def test_environment(): game = DoomGame() # https://github.com/simoninithomas/Deep_reinforcement_learning_Course/blob/master/Deep%20Q%20Learning/Doom/basic.cfg game.load_config('basic.cfg') game.set_doom_scenario_path('basic.wad') game.init() shoot = [0, 0, 1] left = [1, 0, 0] right = [0, 1, 0] actions = [shoot, left, right] episodes = 10 for i in range(episodes): game.new_episode() while not game.is_episode_finished(): state = game.get_state() img = state.screen_buffer misc = state.game_variables action = random.choice(actions) print('Action', action) reward = game.make_action(action) print('Reward', reward) time.sleep(0.02) print('Result', game.get_total_reward()) time.sleep(2) game.close()
class DoomEnv(gym.Env): metadata = { 'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 35 } def __init__(self, level): self.previous_level = -1 self.level = level self.game = DoomGame() self.loader = Loader() self.doom_dir = os.path.dirname(os.path.abspath(__file__)) self._mode = 'algo' # 'algo' or 'human' self.no_render = False # To disable double rendering in human mode self.viewer = None self.is_initialized = False # Indicates that reset() has been called self.curr_seed = 0 self.lock = (DoomLock()).get_lock() # self.action_space = spaces.Discrete(43) # used to be in the old code self.action_space = spaces.MultiBinary(NUM_ACTIONS) self.allowed_actions = list(range(NUM_ACTIONS)) self.screen_height = 120 self.screen_width = 160 self.screen_resolution = ScreenResolution.RES_160X120 self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3), dtype=np.uint8) self.seed() self._configure() def _configure(self, lock=None, **kwargs): if 'screen_resolution' in kwargs: logger.warn( 'Deprecated - Screen resolution must now be set using a wrapper. See documentation for details.' ) # Multiprocessing lock if lock is not None: self.lock = lock def _load_level(self): # Closing if is_initialized if self.is_initialized: self.is_initialized = False self.game.close() self.game = DoomGame() # Customizing level if getattr(self, '_customize_game', None) is not None and callable( self._customize_game): self.level = -1 self._customize_game() else: # Loading Paths if not self.is_initialized: self.game.set_vizdoom_path(self.loader.get_vizdoom_path()) self.game.set_doom_game_path(self.loader.get_freedoom_path()) # Common settings self.game.load_config( os.path.join(self.doom_dir, 'assets/%s' % DOOM_SETTINGS[self.level][CONFIG])) self.game.set_doom_scenario_path( self.loader.get_scenario_path( DOOM_SETTINGS[self.level][SCENARIO])) if DOOM_SETTINGS[self.level][MAP] != '': if RANDOMIZE_MAPS > 0 and 'labyrinth' in DOOM_SETTINGS[ self.level][CONFIG].lower(): if 'fix' in DOOM_SETTINGS[self.level][SCENARIO].lower(): # mapId = 'map%02d'%np.random.randint(1, 23) mapId = 'map%02d' % np.random.randint(4, 8) else: mapId = 'map%02d' % np.random.randint( 1, RANDOMIZE_MAPS + 1) print( '\t=> Special Config: Randomly Loading Maps. MapID = ' + mapId) self.game.set_doom_map(mapId) else: print('\t=> Default map loaded. MapID = ' + DOOM_SETTINGS[self.level][MAP]) self.game.set_doom_map(DOOM_SETTINGS[self.level][MAP]) self.game.set_doom_skill(DOOM_SETTINGS[self.level][DIFFICULTY]) self.allowed_actions = DOOM_SETTINGS[self.level][ACTIONS] self.game.set_screen_resolution(self.screen_resolution) self.previous_level = self.level self._closed = False # Algo mode if 'human' != self._mode: if NO_MONSTERS: print('\t=> Special Config: Monsters Removed.') self.game.add_game_args('-nomonsters 1') self.game self.game.set_window_visible(False) self.game.set_mode(Mode.PLAYER) self.no_render = False try: with self.lock: self.game.init() except (ViZDoomUnexpectedExitException, ViZDoomErrorException): raise error.Error( 'VizDoom exited unexpectedly. This is likely caused by a missing multiprocessing lock. ' + 'To run VizDoom across multiple processes, you need to pass a lock when you configure the env ' + '[e.g. env.configure(lock=my_multiprocessing_lock)], or create and close an env ' + 'before starting your processes [e.g. env = gym.make("DoomBasic-v0"); env.close()] to cache a ' + 'singleton lock in memory.') self._start_episode() self.is_initialized = True return self.game.get_state().screen_buffer.copy() # Human mode else: if NO_MONSTERS: print('\t=> Special Config: Monsters Removed.') self.game.add_game_args('-nomonsters 1') self.game.add_game_args('+freelook 1') self.game.set_window_visible(True) self.game.set_mode(Mode.SPECTATOR) self.no_render = True with self.lock: self.game.init() self._start_episode() self.is_initialized = True self._play_human_mode() return np.zeros(shape=self.observation_space.shape, dtype=np.uint8) def _start_episode(self): if self.curr_seed > 0: self.game.set_seed(self.curr_seed) self.curr_seed = 0 self.game.new_episode() return def _play_human_mode(self): while not self.game.is_episode_finished(): self.game.advance_action() state = self.game.get_state() total_reward = self.game.get_total_reward() info = self._get_game_variables(state.game_variables) info["TOTAL_REWARD"] = round(total_reward, 4) print('===============================') print('State: #' + str(state.number)) print('Action: \t' + str(self.game.get_last_action()) + '\t (=> only allowed actions)') print('Reward: \t' + str(self.game.get_last_reward())) print('Total Reward: \t' + str(total_reward)) print('Variables: \n' + str(info)) sleep(0.02857) # 35 fps = 0.02857 sleep between frames print('===============================') print('Done') return def old_step(self, action): """ action: a number in range 0..42 We get this from the simontudo and his predecessors, it transforms a numeric action from space Discrete(43) into a indicator action . However, we can only press one button at the same time. """ # Convert to array action_arr = np.zeros(NUM_ACTIONS, dtype=int) action_arr[action] = 1 action = action_arr assert self.is_initialized, "Doom env not reset, call .reset()" # action is a list of numbers but DoomGame.make_action expects a list of ints if len(self.allowed_actions) > 0: list_action = [ int(action[action_idx]) for action_idx in self.allowed_actions ] else: list_action = [int(x) for x in action] try: reward = self.game.make_action(list_action) state = self.game.get_state() if self.game.is_episode_finished(): info = {"TOTAL_REWARD": round(self.game.get_total_reward(), 4)} is_finished = True return np.zeros(shape=self.observation_space.shape, dtype=np.uint8), reward, is_finished, info else: info = self._get_game_variables(state.game_variables) info["TOTAL_REWARD"] = round(self.game.get_total_reward(), 4) is_finished = False return state.screen_buffer.copy(), reward, is_finished, info except vizdoom.vizdoom.ViZDoomIsNotRunningException: return np.zeros(shape=self.observation_space.shape, dtype=np.uint8), 0, True, {} def step(self, action): """ action: iterable of length 43, contains indicators of whether given buttons was pressed. Written by me. """ list_action = [int(x) for x in action] try: reward = self.game.make_action(list_action) state = self.game.get_state() if self.game.is_episode_finished(): info = {"TOTAL_REWARD": round(self.game.get_total_reward(), 4)} is_finished = True return np.zeros(shape=self.observation_space.shape, dtype=np.uint8), reward, is_finished, info else: info = self._get_game_variables(state.game_variables) info["TOTAL_REWARD"] = round(self.game.get_total_reward(), 4) is_finished = False return state.screen_buffer.copy(), reward, is_finished, info except vizdoom.vizdoom.ViZDoomIsNotRunningException: return np.zeros(shape=self.observation_space.shape, dtype=np.uint8), 0, True, {} def reset(self): if self.is_initialized and not self._closed: self._start_episode() screen_buffer = self.game.get_state().screen_buffer if screen_buffer is None: raise error.Error( 'VizDoom incorrectly initiated. This is likely caused by a missing multiprocessing lock. ' + 'To run VizDoom across multiple processes, you need to pass a lock when you configure the env ' + '[e.g. env.configure(lock=my_multiprocessing_lock)], or create and close an env ' + 'before starting your processes [e.g. env = gym.make("DoomBasic-v0"); env.close()] to cache a ' + 'singleton lock in memory.') return screen_buffer.copy() else: return self._load_level() def render(self, mode='human', close=False): if close: if self.viewer is not None: self.viewer.close() self.viewer = None # If we don't None out this reference pyglet becomes unhappy return try: if 'human' == mode and self.no_render: return state = self.game.get_state() img = state.screen_buffer # VizDoom returns None if the episode is finished, let's make it # an empty image so the recorder doesn't stop if img is None: img = np.zeros(shape=self.observation_space.shape, dtype=np.uint8) if mode == 'rgb_array': return img elif mode is 'human': from gym.envs.classic_control import rendering if self.viewer is None: self.viewer = rendering.SimpleImageViewer() self.viewer.imshow(img) except vizdoom.vizdoom.ViZDoomIsNotRunningException: return np.zeros(shape=self.observation_space.shape, dtype=np.uint8) except AttributeError: return np.zeros(shape=self.observation_space.shape, dtype=np.uint8) def close(self): # Lock required for VizDoom to close processes properly with self.lock: self.game.close() def seed(self, seed=None): self.curr_seed = seeding.hash_seed(seed) % 2**32 return [self.curr_seed] def _get_game_variables(self, state_variables): info = {"LEVEL": self.level} if state_variables is None: return info info['KILLCOUNT'] = state_variables[0] info['ITEMCOUNT'] = state_variables[1] info['SECRETCOUNT'] = state_variables[2] info['FRAGCOUNT'] = state_variables[3] info['HEALTH'] = state_variables[4] info['ARMOR'] = state_variables[5] info['DEAD'] = state_variables[6] info['ON_GROUND'] = state_variables[7] info['ATTACK_READY'] = state_variables[8] info['ALTATTACK_READY'] = state_variables[9] info['SELECTED_WEAPON'] = state_variables[10] info['SELECTED_WEAPON_AMMO'] = state_variables[11] info['AMMO1'] = state_variables[12] info['AMMO2'] = state_variables[13] info['AMMO3'] = state_variables[14] info['AMMO4'] = state_variables[15] info['AMMO5'] = state_variables[16] info['AMMO6'] = state_variables[17] info['AMMO7'] = state_variables[18] info['AMMO8'] = state_variables[19] info['AMMO9'] = state_variables[20] info['AMMO0'] = state_variables[21] return info
class Game(object): def __init__(self, scenario, action_builder, score_variable='FRAGCOUNT', freedoom=True, screen_resolution='RES_400X225', screen_format='CRCGCB', use_screen_buffer=True, use_depth_buffer=False, labels_mapping='', game_features='', mode='ASYNC_PLAYER', render_hud=False, render_minimal_hud=False, render_crosshair=True, render_weapon=True, render_decals=False, render_particles=False, render_effects_sprites=False, respawn_protect=True, spawn_farthest=True, freelook=False, name='LUBAN', color=0, visible=False, n_bots=0, use_scripted_marines=None, doom_skill=2): """ Create a new game. score_variable: indicates in which game variable the user score is stored. by default it's in FRAGCOUNT, but the score in ACS against built-in AI bots can be stored in USER1, USER2, etc. render_decals: marks on the walls render_particles: particles like for impacts / traces render_effects_sprites: gun puffs / blood splats color: 0 - green, 1 - gray, 2 - brown, 3 - red, 4 - light gray, 5 - light brown, 6 - light red, 7 - light blue """ # game resources game_filename = '%s.wad' % ('freedoom2' if freedoom else 'Doom2') self.scenario_path = os.path.join(RESOURCES_DIR, '%s.wad' % scenario) self.game_path = os.path.join(RESOURCES_DIR, game_filename) print(self.scenario_path) print(self.game_path) # check parameters assert os.path.isfile(self.scenario_path) assert os.path.isfile(self.game_path) assert hasattr(GameVariable, score_variable) assert hasattr(ScreenResolution, screen_resolution) assert hasattr(ScreenFormat, screen_format) assert use_screen_buffer or use_depth_buffer assert hasattr(Mode, mode) assert len(name.strip()) > 0 and color in range(8) assert n_bots >= 0 assert (type(use_scripted_marines) is bool or use_scripted_marines is None and n_bots == 0) assert 0 <= doom_skill <= 4 # action builder self.action_builder = action_builder # add the score variable to the game variables list self.score_variable = score_variable game_variables.append(('score', getattr(GameVariable, score_variable))) # screen buffer / depth buffer / labels buffer / mode self.screen_resolution = screen_resolution self.screen_format = screen_format self.use_screen_buffer = use_screen_buffer self.use_depth_buffer = use_depth_buffer self.labels_mapping = parse_labels_mapping(labels_mapping) self.game_features = parse_game_features(game_features) self.use_labels_buffer = self.labels_mapping is not None self.use_game_features = any(self.game_features) self.mode = mode # rendering options self.render_hud = render_hud self.render_minimal_hud = render_minimal_hud self.render_crosshair = render_crosshair self.render_weapon = render_weapon self.render_decals = render_decals self.render_particles = render_particles self.render_effects_sprites = render_effects_sprites # respawn invincibility / distance self.respawn_protect = respawn_protect self.spawn_farthest = spawn_farthest # freelook / agent name / agent color self.freelook = freelook self.name = name.strip() self.color = color # window visibility self.visible = visible # game statistics self.stat_keys = [ 'distance', 'kills', 'deaths', 'suicides', 'frags', 'k/d', 'medikits', 'armors', 'pistol', 'shotgun', 'chaingun', 'rocketlauncher', 'plasmarifle', 'bfg9000', 'bullets', 'shells', 'rockets', 'cells' ] self.statistics = {} # number of bots in the game self.n_bots = n_bots self.use_scripted_marines = use_scripted_marines # doom skill self.doom_skill = doom_skill # manual control self.count_non_forward_actions = 0 self.count_non_turn_actions = 0 def update_game_variables(self): """ Check and update game variables. """ # read game variables new_v = {k: self.game.get_game_variable(v) for k, v in game_variables} assert all(v.is_integer() or k[-2:] in ['_x', '_y', '_z'] for k, v in new_v.items()) new_v = { k: (int(v) if v.is_integer() else float(v)) for k, v in new_v.items() } health = new_v['health'] armor = new_v['armor'] sel_weapon = new_v['sel_weapon'] sel_ammo = new_v['sel_ammo'] bullets = new_v['bullets'] shells = new_v['shells'] rockets = new_v['rockets'] cells = new_v['cells'] fist = new_v['fist'] pistol = new_v['pistol'] shotgun = new_v['shotgun'] chaingun = new_v['chaingun'] rocketlauncher = new_v['rocketlauncher'] plasmarifle = new_v['plasmarifle'] bfg9000 = new_v['bfg9000'] # check game variables if sel_weapon == -1: new_v['sel_weapon'] = 1 sel_weapon = 1 if sel_ammo == -1: new_v['sel_ammo'] = 0 sel_ammo = 0 assert sel_weapon in range(1, 8), sel_weapon assert sel_ammo >= 0, sel_ammo assert all(x in [0, 1] for x in [ fist, pistol, shotgun, chaingun, rocketlauncher, plasmarifle, bfg9000 ]) assert 0 <= health <= 200 or health < 0 and self.game.is_player_dead() assert 0 <= armor <= 200, (health, armor) assert 0 <= bullets <= 200 and 0 <= shells <= 50 assert 0 <= rockets <= 50 and 0 <= cells <= 300 # fist if sel_weapon == 1: assert sel_ammo == 0 # pistol elif sel_weapon == 2: assert pistol and sel_ammo == bullets # shotgun elif sel_weapon == 3: assert shotgun and sel_ammo == shells # chaingun elif sel_weapon == 4: assert chaingun and sel_ammo == bullets # rocket launcher elif sel_weapon == 5: assert rocketlauncher and sel_ammo == rockets # plasma rifle elif sel_weapon == 6: assert plasmarifle and sel_ammo == cells # BFG9000 elif sel_weapon == 7: assert bfg9000 and sel_ammo == cells # update actor properties self.prev_properties = self.properties self.properties = new_v def update_statistics(self, action): """ Update statistics of the current game based on the previous and the current properties for evaluating the agent performance. """ stats = self.statistics[self.map_id] # we need to know the current and previous properties assert self.prev_properties is not None and self.properties is not None # distance moving_forward = action[self.mapping['MOVE_FORWARD']] turn_left = action[self.mapping['TURN_LEFT']] turn_right = action[self.mapping['TURN_RIGHT']] if moving_forward and not (turn_left or turn_right): diff_x = self.properties['position_x'] - self.prev_properties[ 'position_x'] diff_y = self.properties['position_y'] - self.prev_properties[ 'position_y'] distance = math.sqrt(diff_x**2 + diff_y**2) stats['distance'] += distance # kill d = self.properties['score'] - self.prev_properties['score'] if d > 0: stats['kills'] += d # death if self.game.is_player_dead(): stats['deaths'] += 1 # suicide if self.properties['frag_count'] < self.prev_properties['frag_count']: stats['suicides'] += 1 # found / lost health d = self.properties['health'] - self.prev_properties['health'] if d != 0: if d > 0: stats['medikits'] += 1 # found / lost armor d = self.properties['armor'] - self.prev_properties['armor'] if d != 0: if d > 0: stats['armors'] += 1 # found weapon for i, weapon in enumerate([ 'pistol', 'shotgun', 'chaingun', 'rocketlauncher', 'plasmarifle', 'bfg9000' ]): if self.prev_properties[weapon] == self.properties[weapon]: continue stats[weapon] += 1 # found / lost ammo for ammo in ['bullets', 'shells', 'rockets', 'cells']: d = self.properties[ammo] - self.prev_properties[ammo] if d != 0: if d > 0: stats[ammo] += 1 def start(self, map_id, episode_time=None, manual_control=False): """ Start the game. If `episode_time` is given, the game will end after the specified time. """ assert type(manual_control) is bool self.manual_control = manual_control # Save statistics for this map self.statistics[map_id] = {k: 0 for k in self.stat_keys} # Episode time self.episode_time = episode_time # initialize the game self.game = DoomGame() self.game.set_doom_scenario_path(self.scenario_path) self.game.set_doom_game_path(self.game_path) # map assert map_id > 0 self.map_id = map_id self.game.set_doom_map("map%02i" % map_id) # time limit if episode_time is not None: self.game.set_episode_timeout(int(35 * episode_time)) # game parameters args = [] # host / server args.append('-host 1') # screen buffer / depth buffer / labels buffer / mode screen_resolution = getattr(ScreenResolution, self.screen_resolution) self.game.set_screen_resolution(screen_resolution) self.game.set_screen_format(getattr(ScreenFormat, self.screen_format)) self.game.set_depth_buffer_enabled(self.use_depth_buffer) self.game.set_labels_buffer_enabled(self.use_labels_buffer or self.use_game_features) self.game.set_mode(getattr(Mode, self.mode)) # rendering options self.game.set_render_hud(self.render_hud) self.game.set_render_minimal_hud(self.render_minimal_hud) self.game.set_render_crosshair(self.render_crosshair) self.game.set_render_weapon(self.render_weapon) self.game.set_render_decals(self.render_decals) self.game.set_render_particles(self.render_particles) self.game.set_render_effects_sprites(self.render_effects_sprites) # deathmatch mode # players will respawn automatically after they die # autoaim is disabled for all players args.append('-deathmatch') args.append('+sv_forcerespawn 1') args.append('+sv_noautoaim 1') # respawn invincibility / distance # players will be invulnerable for two second after spawning # players will be spawned as far as possible from any other players args.append('+sv_respawnprotect %i' % self.respawn_protect) args.append('+sv_spawnfarthest %i' % self.spawn_farthest) # freelook / agent name / agent color args.append('+freelook %i' % (1 if self.freelook else 0)) args.append('+name %s' % self.name) args.append('+colorset %i' % self.color) # enable the cheat system (so that we can still # send commands to the game in self-play mode) args.append('+sv_cheats 1') # load parameters self.args = args for arg in args: self.game.add_game_args(arg) # window visibility self.game.set_window_visible(self.visible) # available buttons self.mapping = add_buttons(self.game, self.action_builder.available_buttons) # doom skill self.game.set_doom_skill(self.doom_skill + 1) # start the game self.game.init() # initialize the game after player spawns self.initialize_game() def update_bots(self): """ Add built-in AI bots. There are two types of AI: built-in AI and ScriptedMarines. """ # only the host takes care of the bots if self.use_scripted_marines: command = "pukename set_value always 2 %i" % self.n_bots self.game.send_game_command(command) else: self.game.send_game_command("removebots") for _ in range(self.n_bots): self.game.send_game_command("addbot") def is_player_dead(self): """ Detect whether the player is dead. """ return self.game.is_player_dead() def is_episode_finished(self): """ Return whether the episode is finished. This should only be the case after the episode timeout. """ return self.game.is_episode_finished() def is_final(self): """ Return whether the game is in a final state. """ return self.is_player_dead() or self.is_episode_finished() def new_episode(self): """ Start a new episode. """ assert self.is_episode_finished() or self.is_player_dead() self.game.new_episode() self.initialize_game() def respawn_player(self): """ Respawn the player on death. """ assert self.is_player_dead() self.game.respawn_player() self.initialize_game() def initialize_game(self): """ Initialize the game after the player spawns / respawns. Be sure that properties from the previous life are not considered in this one. """ # generate buffers game_state = self.game.get_state() self._screen_buffer = game_state.screen_buffer self._depth_buffer = game_state.depth_buffer self._labels_buffer = game_state.labels_buffer self._labels = game_state.labels # actor properties self.prev_properties = None self.properties = None # advance a few steps to avoid bugs due to initial weapon changes in ACS self.game.advance_action(SKIP_INITIAL_ACTIONS) self.update_game_variables() # if there are bots in the game, and if this is a new game self.update_bots() def randomize_textures(self, randomize): """ Randomize the textures of the map. """ assert type(randomize) is bool randomize = 1 if randomize else 0 self.game.send_game_command("pukename set_value always 4 %i" % randomize) def init_bots_health(self, health): """ Initial bots health. """ assert self.use_scripted_marines or health == 100 assert 0 < health <= 100 self.game.send_game_command("pukename set_value always 5 %i" % health) def make_action(self, action, frame_skip=1, sleep=None): """ Make an action. If `sleep` is given, the network will wait `sleep` seconds between each action. """ assert frame_skip >= 1 # convert selected action to the ViZDoom action format action = self.action_builder.get_action(action) # select agent favorite weapon for weapon_name, weapon_ammo, weapon_id in WEAPONS_PREFERENCES: min_ammo = 40 if weapon_name == 'bfg9000' else 1 if self.properties[weapon_name] > 0 and self.properties[ weapon_ammo] >= min_ammo: if self.properties['sel_weapon'] != weapon_id: switch_action = ( [False] * self.mapping['SELECT_WEAPON%i' % weapon_id]) + [True] action = action + switch_action[len(action):] break if action[self.mapping['MOVE_FORWARD']]: self.count_non_forward_actions = 0 else: self.count_non_forward_actions += 1 if action[self.mapping['TURN_LEFT']] or action[ self.mapping['TURN_RIGHT']]: self.count_non_turn_actions = 0 else: self.count_non_turn_actions += 1 if self.manual_control and (self.count_non_forward_actions >= 30 or self.count_non_turn_actions >= 60): manual_action = [False] * len(action) manual_action[self.mapping['TURN_RIGHT']] = True manual_action[self.mapping['SPEED']] = True if self.count_non_forward_actions >= 30: manual_action[self.mapping['MOVE_FORWARD']] = True manual_repeat = 40 self.count_non_forward_actions = 0 self.count_non_turn_actions = 0 else: manual_action = None # if we are visualizing the experiment, show all the frames one by one if self.visible: if manual_action is not None: for _ in range(manual_repeat): self.game.make_action(manual_action) else: for _ in range(frame_skip): self.game.make_action(action) # death or episode finished if self.is_player_dead() or self.is_episode_finished(): break # sleep for smooth visualization if sleep is not None: time.sleep(sleep) else: if manual_action is not None: self.game.make_action(manual_action, manual_repeat) else: self.game.make_action(action, frame_skip) # generate buffers game_state = self.game.get_state() if game_state is not None: self._screen_buffer = game_state.screen_buffer self._depth_buffer = game_state.depth_buffer self._labels_buffer = game_state.labels_buffer self._labels = game_state.labels # update game variables / statistics rewards self.update_game_variables() self.update_statistics(action) def close(self): """ Close the current game. """ self.game.close() def print_statistics(self, eval_time=None): """ Print agent statistics. If `map_id` is is given, statistics are given for the specified map only. """ map_ids = self.statistics.keys() assert len(map_ids) == 1 for v in self.statistics.values(): assert set(self.stat_keys) == set(v.keys()) # number of frags (kills - suicides) # 100% accurate if the number of frags is given by 'FRAGCOUNT' # almost 100% accurate if it is based on an internal ACS variable for v in self.statistics.values(): v['frags'] = v['kills'] - v['suicides'] # Kills / Deaths # 100% accurate if the number of kills is given by an ACS variable # almost 100% accurate if it is based on 'FRAGCOUNT' for v in self.statistics.values(): v['k/d'] = v['kills'] * 1.0 / max(1, v['deaths']) print("******************Game statistics summary********************") print("Map%02d" % self.map_id) for item in self.stat_keys: print(item + ":\t%d" % self.statistics[self.map_id][item]) def observe_state(self, params, last_states): """ Observe the current state of the game. """ # read game state screen, game_features = process_buffers(self, params) variables = [self.properties[x[0]] for x in params.game_variables] last_states.append(GameState(screen, variables, game_features)) # update most recent states if len(last_states) == 1: last_states.extend([last_states[0]] * (params.hist_size - 1)) else: assert len(last_states) == params.hist_size + 1 del last_states[0] # return the screen and the game features return screen, game_features
class DoomEnv(gym.Env, EzPickle): metadata = { 'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 35 } def __init__(self, level='deathmatch', obs_type='ram'): # super(DoomEnv, self).__init__() EzPickle.__init__(self, level.split('.')[0], obs_type) assert obs_type in ('ram', 'image') level = level.split('.')[0] Config.init(level) self.curr_seed = 0 self.game = DoomGame() self.lock = (DoomLock()).get_lock() self.level = level self.obs_type = obs_type self.tick = 4 self._mode = 'algo' self.is_render_in_human_mode = True self.is_game_initialized = False self.is_level_loaded = False self.viewer = None self.set_game(self.level, resolution=None, render=True) print() # todo: add frame skip option by using tick def step(self, action): reward = 0.0 # self.tick = 4 if self._mode == 'algo': if self.tick: reward = self.game.make_action(action, self.tick) else: reward = self.game.make_action(action) # self.game.set_action(action) # self.game.advance_action(4) # reward = self.game.get_last_reward() return self.get_obs(), reward, self.isDone(), self.get_info() def reset(self): if not self.is_game_initialized: self.__load_level() self.__init_game() self.__start_episode() return self.get_obs() def render(self, mode='human', **kwargs): if 'close' in kwargs and kwargs['close']: if self.viewer is not None: self.viewer.close() self.viewer = None return if mode == 'human' and not self.is_render_in_human_mode: return img = self.get_image() if mode == 'rgb_array': return img elif mode is 'human': if self.viewer is None: self.viewer = rendering.SimpleImageViewer() self.viewer.imshow(img) def close(self): with self.lock: self.game.close() def seed(self, seed=None): self.curr_seed = seeding.hash_seed(seed) % 2**32 return [self.curr_seed] # ================================== GETTERS SETTERS =============================================================== def set_game(self, level, resolution, render): self.__configure() self.__load_level(level) self.__set_resolution(resolution) self.__set_obs_and_ac_space() self.__set_player(render) def __configure(self, lock=None, **kwargs): self.seed() if lock is not None: self.lock = lock def __load_level(self, level=None): if level is not None: self.level = level.split('.')[0] self.is_level_loaded = False if self.is_level_loaded: return if self.is_game_initialized: self.is_game_initialized = False self.game.close() self.game = DoomGame() if not self.is_game_initialized: self.game.set_vizdoom_path(Config.VIZDOOM_PATH) self.game.set_doom_game_path(Config.FREEDOOM_PATH) # Common settings self.record_file_path = Config.RECORD_FILE_PATH self.game.load_config(Config.VIZDOOM_SCENARIO_PATH + Config.DOOM_SETTINGS[self.level][Config.CONFIG]) self.game.set_doom_scenario_path( Config.VIZDOOM_SCENARIO_PATH + Config.DOOM_SETTINGS[self.level][Config.SCENARIO]) if Config.DOOM_SETTINGS[self.level][Config.MAP] != '': self.game.set_doom_map( Config.DOOM_SETTINGS[self.level][Config.MAP]) self.game.set_doom_skill( Config.DOOM_SETTINGS[self.level][Config.DIFFICULTY]) self.allowed_actions = Config.DOOM_SETTINGS[self.level][Config.ACTIONS] self.available_game_variables = Config.DOOM_SETTINGS[self.level][ Config.GAME_VARIABLES] self.is_level_loaded = True def __set_resolution(self, resolution=None): if resolution is None: resolution = Config.DEFAULT_SCREEN_RESOLUTION resolution_l = resolution.lower() if resolution_l not in resolutions: raise gym.error.Error( 'Error - The specified resolution "{}" is not supported by Vizdoom.\n The list of valid' 'resolutions: {}'.format(resolution, resolutions)) if '_' in resolution_l: resolution_l = resolution_l.split('_')[1] self.scr_width = int(resolution_l.split("x")[0]) self.scr_height = int(resolution_l.split("x")[1]) self.game.set_screen_resolution( getattr(ScreenResolution, 'RES_{}X{}'.format(self.scr_width, self.scr_height))) self.screen_format = self.game.get_screen_format() self.screen_height = self.game.get_screen_height() self.screen_width = self.game.get_screen_width() def __set_obs_and_ac_space(self): if self.obs_type == 'ram': self.observation_space = spaces.Box( low=0, high=255, dtype=np.uint8, shape=(len(self.available_game_variables), )) elif self.obs_type == 'image': # self.observation_space = self.screen_resized self.observation_space = spaces.Box(low=0, high=255, shape=(self.scr_height, self.scr_width, 3), dtype=np.uint8) else: raise error.Error('Unrecognized observation type: {}'.format( self.obs_type)) if self.screen_format in inverted_screen_formats: self.dummy_screen = np.zeros(shape=(3, self.scr_height, self.scr_width), dtype=np.uint8) else: self.dummy_screen = np.zeros(shape=(self.scr_height, self.scr_width, 3), dtype=np.uint8) self.dummy_ram = [0] * len(self.available_game_variables) self.available_action_codes = [ list(a) for a in it.product([0, 1], repeat=self.game.get_available_buttons_size()) ] # self.__delete_conflict_actions() self.action_space = spaces.MultiDiscrete( [len(self.available_action_codes)]) def __set_player(self, render=True): self.game.set_window_visible(render) self.game.set_mode(Mode.PLAYER) def __init_game(self): try: with self.lock: self.game.init() self.is_game_initialized = True except (ViZDoomUnexpectedExitException, ViZDoomErrorException): raise error.Error('Could not start the game.') def __start_episode(self): if self.curr_seed > 0: self.game.set_seed(self.curr_seed) self.curr_seed = 0 if self.record_file_path: self.game.new_episode(self.record_file_path) else: self.game.new_episode() return def getState(self): return self.game.get_state() def getLastAction(self): return self.game.get_last_action() def getButtonsNames(self, action): return action_to_buttons(self.allowed_actions, action) def get_info(self): info = { "LEVEL": self.level, "TOTAL_REWARD": round(self.game.get_total_reward(), 4) } state_variables = self.get_ram() for i in range(len(self.available_game_variables)): info[self.available_game_variables[i]] = state_variables[i] return info def get_ram(self): if not self.is_game_initialized: raise NotImplementedError( "The game was not initialized. Run env.reset() first!") try: ram = self.getState().game_variables except AttributeError: ram = self.dummy_ram return ram def get_image(self): try: screen = self.getState().screen_buffer.copy() except AttributeError: screen = self.dummy_screen return self.invert_screen(screen) def get_obs(self): if self.obs_type == 'ram': return self.get_ram() elif self.obs_type == 'image': return self.get_image() def isDone(self): return self.game.is_episode_finished() or self.game.is_player_dead( ) or self.getState() is None # =========================================== ============================================================== def invert_screen(self, img): if self.screen_format in inverted_screen_formats: return np.rollaxis(img, 0, 3) else: return img def __delete_conflict_actions(self): if self._mode == 'human': return action_codes_copy = self.available_action_codes.copy() print("Initial actions size: " + str(len(action_codes_copy))) for i in tqdm.trange(len(self.available_action_codes)): action = self.available_action_codes[i] ac_names = action_to_buttons(self.allowed_actions, action) if all(elem in ac_names for elem in ['MOVE_LEFT', 'MOVE_RIGHT']) or all( elem in ac_names for elem in ['MOVE_BACKWARD', 'MOVE_FORWARD']) or all( elem in ac_names for elem in ['TURN_RIGHT', 'TURN_LEFT']) or all( elem in ac_names for elem in ['SELECT_NEXT_WEAPON', 'SELECT_PREV_WEAPON']): action_codes_copy.remove(action) print("Final actions size: " + str(len(action_codes_copy))) self.available_action_codes = action_codes_copy def __initHumanPlayer(self): self._mode = 'human' self.__load_level() self.game.add_game_args('+freelook 1') self.game.set_window_visible(True) self.game.set_mode(Mode.SPECTATOR) self.is_render_in_human_mode = False self.__init_game() def advanceAction(self, tick=0): try: if tick: self.game.advance_action(tick) else: self.game.advance_action() return True except ViZDoomUnexpectedExitException: return False def playHuman(self): self.__initHumanPlayer() while not self.game.is_episode_finished( ) and not self.game.is_player_dead(): self.advanceAction() state = self.getState() if state is None: if self.record_file_path is None: self.game.new_episode() else: self.game.new_episode(self.record_file_path) state = self.getState() total_reward = self.game.get_total_reward() info = self.get_info() info["TOTAL_REWARD"] = round(total_reward, 4) print('===============================') print('State: #' + str(state.number)) print('Action: \t' + str(self.game.get_last_action()) + '\t (=> only allowed actions)') print('Reward: \t' + str(self.game.get_last_reward())) print('Total Reward: \t' + str(total_reward)) print('Variables: \n' + str(info)) sleep(0.02857) # 35 fps = 0.02857 sleep between frames print('===============================') print('Done') return
class DoomEnvironment: def __init__(self, scenario, path_to_config="doom/config"): self.game = DoomGame() self.game.load_config(path_to_config + "/" + scenario + ".cfg") self.game.set_doom_scenario_path(path_to_config + "/" + scenario + ".wad") self.game.set_window_visible(False) self.game.init() self.num_actions = len(self.game.get_available_buttons()) def reset(self): self.game.new_episode() game_state = self.game.get_state() obs = game_state.screen_buffer self.h, self.w = obs.shape[1:3] self.current_obs = self.preprocess_obs(obs) if self.game.get_available_game_variables_size() == 2: self.ammo, self.health = game_state.game_variables return self.get_obs() def get_obs(self): return self.current_obs[:, :, None] def get_obs_rgb(self): img = self.game.get_state().screen_buffer img = np.rollaxis(img, 0, 3) img = np.reshape(img, [self.h, self.w, 3]) return img.astype(np.uint8) def preprocess_obs(self, obs): img = np.rollaxis(obs, 0, 3) img = np.reshape(img, [self.h, self.w, 3]).astype(np.float32) img = img[:, :, 0] * 0.299 + img[:, :, 1] * 0.587 + img[:, :, 2] * 0.114 img = Image.fromarray(img) img = img.resize((84, 84), Image.BILINEAR) img = np.array(img) return img.astype(np.uint8) def action_to_doom(self, a): action = [0 for i in range(self.num_actions)] action[int(a)] = 1 return action def step(self, a): action = self.action_to_doom(a) reward = self.game.make_action(action) done = self.game.is_episode_finished() if done: new_obs = np.zeros_like(self.current_obs, dtype=np.uint8) else: game_state = self.game.get_state() new_obs = game_state.screen_buffer new_obs = self.preprocess_obs(new_obs) self.current_obs = new_obs return self.get_obs(), reward, done def watch_random_play(self, max_ep_length=1000, frame_skip=4): self.reset() for i in range(max_ep_length): a = np.random.randint(self.num_actions) obs, reward, done = self.step(a) if done: break img = self.get_obs_rgb() if i % frame_skip == 0: plt.imshow(img) display.clear_output(wait=True) display.display(plt.gcf())
class Experiment(object): """ Used to perform experiment combined with a Agent Main methods : - """ def __init__(self, scenario, action_builder, reward_builder, logger, living_reward=0, custom_reward=False, score_variable='FRAGCOUNT', game_features=[], freedoom=True, screen_resolution='RES_400X225', screen_format='CRCGCB', use_screen_buffer=True, use_depth_buffer=False, use_labels_buffer=True, mode='PLAYER', player_rank=0, players_per_game=1, render_hud=False, render_minimal_hud=False, render_crosshair=True, render_weapon=True, render_decals=False, render_particles=False, render_effects_sprites=False, respawn_protect=True, spawn_farthest=True, name='Hubert_Bonnisseur_de_la_Bate', visible=False, n_bots=0, use_scripted_marines=None, doom_skill=2): """ Create a new game. render_decals: marks on the walls render_particles: particles like for impacts / traces render_effects_sprites: gun puffs / blood splats color: 0 - green, 1 - gray, 2 - brown, 3 - red, 4 - light gray, 5 - light brown, 6 - light red, 7 - light blue """ # game resources game_filename = 'freedoom2.wad' self.scenario = scenario self.scenario_path = os.path.join(PATH, 'scenarios/{}.wad'.format(scenario)) self.game_path = os.path.join(PATH, game_filename) # check parameters assert os.path.isfile(self.scenario_path) assert os.path.isfile(self.game_path) assert hasattr(GameVariable, score_variable) assert hasattr(ScreenResolution, screen_resolution) assert hasattr(ScreenFormat, screen_format) assert use_screen_buffer or use_depth_buffer assert hasattr(Mode, mode) assert not (render_minimal_hud and not render_hud) assert len(name.strip()) > 0 assert n_bots >= 0 assert (type(use_scripted_marines) is bool or use_scripted_marines is None and n_bots == 0) assert 0 <= doom_skill <= 4 assert 0 < players_per_game assert 0 <= player_rank # screen buffer / depth buffer / labels buffer / mode self.screen_resolution = screen_resolution self.screen_format = screen_format self.use_screen_buffer = use_screen_buffer self.use_depth_buffer = use_depth_buffer self.game_features = parse_game_features(game_features, logger) self.use_labels_buffer = use_labels_buffer self.use_game_features = any(self.game_features) self.mode = mode # rendering options self.render_hud = render_hud self.render_minimal_hud = render_minimal_hud self.render_crosshair = render_crosshair self.render_weapon = render_weapon self.render_decals = render_decals self.render_particles = render_particles self.render_effects_sprites = render_effects_sprites # window visibility self.visible = visible # actor reward ''' used for reward shaping (LSTM & Curiosity A3C) ''' self.reward_builder = reward_builder self.living_reward = living_reward self.custom_reward = custom_reward # number of bots in the game self.n_bots = n_bots self.use_scripted_marines = use_scripted_marines # doom skill (ie difficulty of the game) self.doom_skill = doom_skill # bot name self.name = name # action builder self.action_builder = action_builder # save game statistics for each episode (used for model comparison and reward shaping) self.stats = {} # use logging for DEBUG purpose self.logger = logger #============================================================================== # Game start #============================================================================== def start(self, map_id, episode_time=None, log_events=False): """ Start the game. If `episode_time` is given, the game will end after the specified time. """ # Episode time self.episode_time = episode_time # initialize the game self.game = DoomGame() self.game.set_doom_scenario_path(self.scenario_path) self.game.set_doom_game_path(self.game_path) # map assert map_id > 0 self.map_id = map_id self.game.set_doom_map('map{:02d}'.format(map_id)) # time limit if episode_time is not None: self.game.set_episode_timeout(episode_time) # Save statistics for this map self.stats[self.map_id] = [] # log events that happen during the game (useful for testing) # self.log_events = log_events # game parameters args = [] # screen buffer / depth buffer / labels buffer / mode screen_resolution = getattr(ScreenResolution, self.screen_resolution) self.game.set_screen_resolution(screen_resolution) self.game.set_screen_format(getattr(ScreenFormat, self.screen_format)) self.game.set_depth_buffer_enabled(self.use_depth_buffer) self.game.set_labels_buffer_enabled(self.use_labels_buffer) self.game.set_mode(getattr(Mode, self.mode)) # rendering options self.game.set_render_hud(self.render_hud) self.game.set_render_minimal_hud(self.render_minimal_hud) self.game.set_render_crosshair(self.render_crosshair) self.game.set_render_weapon(self.render_weapon) self.game.set_render_decals(self.render_decals) self.game.set_render_particles(self.render_particles) self.game.set_render_effects_sprites(self.render_effects_sprites) # deathmatch mode # players will respawn automatically after they die # autoaim is disabled for all players # args.append('-deathmatch') args.append('+sv_forcerespawn 1') args.append('+sv_noautoaim 1') # agent name args.append('+name %s' % self.name) # load parameters self.args = args for arg in args: self.game.add_game_args(arg) # window visibility self.game.set_window_visible(self.visible) # define available buttons self.action_builder.set_buttons(self.game) # doom skill (https://zdoom.org/wiki/GameSkill) self.game.set_doom_skill(self.doom_skill + 1) # define basic rewards self.game.set_living_reward(self.living_reward) # start the game self.game.init() # initialize the game after player spawns self.initialize_game() self.logger.info('start_game') #============================================================================== # Game statistics #============================================================================== def update_game_properties(self): """ Update game properties. """ # read game variables new_v = { k: self.game.get_game_variable(v) for k, v in GAME_FEATURES.items() } new_v = { k: (int(v) if v.is_integer() else float(v)) for k, v in new_v.items() } # update game properties self.prev_properties = self.properties self.properties = new_v def update_game_statistics(self): """ Calculate game statistics and store them in the running stats dict """ stats = self.run_stats # init r if custom rewards r = [] # calculate stats # kill d = self.properties['kill_count'] - self.prev_properties['kill_count'] if d > 0: r.extend(d * ['kill_count']) stats['kills'] += d # death if self.game.is_player_dead(): r.append('dead') stats['deaths'] += 1 # suicide if self.properties['frag_count'] < self.prev_properties['frag_count']: r.append('suicide') stats['suicides'] += 1 # found health d = self.properties['health'] - self.prev_properties['health'] if d != 0: if d > 0: r.append('medikit') stats['medikit'] += 1 stats['health'] = self.properties['health'] # health lost d = self.properties['damage_count'] - self.prev_properties[ 'damage_count'] if d > 0: r.append('health_lost') # found armor d = self.properties['armor'] - self.prev_properties['armor'] if d != 0: if d > 0: r.append('armor') stats['armor'] += 1 # found weapon if self.prev_properties['sel_weapon'] != self.properties['sel_weapon']: r.append('weapon') stats['found_weapon'] += 1 # found / lost ammo d = self.properties['sel_ammo'] - self.prev_properties['sel_ammo'] if self.prev_properties['sel_weapon'] == self.properties['sel_weapon']: if d != 0: if d > 0: r.append('ammo') stats['ammo'] += 1 else: r.append('use_ammo') # auxiliary stats not used for rewards stats['frag_count'] = self.properties['frag_count'] return r def calculate_final_stats(self): """ Calculate the final stats from the running stats """ self.run_stats['k/d'] = self.run_stats['kills'] * 1.0 / max( 1, self.run_stats['deaths']) #============================================================================== # Game handling #============================================================================== def is_player_dead(self): """ Detect whether the player is dead. """ return self.game.is_player_dead() def is_episode_finished(self): """ Return whether the episode is finished. This should only be the case after the episode timeout. """ return self.game.is_episode_finished() def is_final(self): """ Return whether the game is in a final state. """ return self.is_player_dead() or self.is_episode_finished() def reset(self): """ Reset the game if necessary. This can be because: - we reach the end of an episode (we restart the game) - because the agent is dead (we make it respawn) """ self.stats[self.map_id].append(self.run_stats) # if the player is dead if self.is_player_dead(): # respawn it (deathmatch mode) if self.episode_time is None: self.respawn_player() # or reset the episode (episode ends when the agent dies) else: self.new_episode() # start a new episode if it is finished if self.is_episode_finished(): self.new_episode() # deal with a ViZDoom issue # while self.is_player_dead(): # logger.warning('Player %i is still dead after respawn.' % # self.params.player_rank) # self.respawn_player() def respawn_player(self): """ Respawn the player on death. """ assert self.is_player_dead() self.game.respawn_player() # self.log('Respawn player') self.initialize_game() def new_episode(self): """ Start a new episode. """ # init new stats for the episode self.run_stats = {k: 0 for k in STAT_KEYS} # init new game self.game.new_episode() # init episode properties self.initialize_game() # self.log('New episode') def initialize_game(self): """ Reset game properties """ new_v = { k: self.game.get_game_variable(v) for k, v in GAME_FEATURES.items() } new_v = { k: (int(v) if v.is_integer() else float(v)) for k, v in new_v.items() } self.stats self.prev_properties = None self.properties = new_v def close(self): """ Close the current experiment. """ self.game.close() def observe_state(self, variable_names, feature_names): """ Observe the current state of the game. """ # read game state screen, variables, game_features = process_game_info( self.game, variable_names, feature_names) # last_states.append(GameState(screen, variables, game_features)) # return the screen and the game features return screen, variables, game_features def make_action(self, action, variable_names, feature_names, frame_skip=1, sleep=None): """ Process action and give the next state according to the game motor Inputs : action : frame_skips : nb of frames during which the same action is performed sleep : pause game for sleep seconds in order to smooth visualization Output : reward defined in the game motor or customized screen | variables | of the next state (if not final state) game_features | """ assert frame_skip >= 1 # convert selected action to the ViZDoom action format action = self.action_builder.get_action(action) # smooth visualization if needed for make if self.visible: r = 0 for _ in range(frame_skip): r += self.game.make_action(action) # death or episode finished if self.is_player_dead() or self.is_episode_finished(): break # sleep for smooth visualization if sleep is not None: time.sleep(sleep) else: r = self.game.make_action(action, frame_skip) # observe resulting state if not self.is_final(): screen, variables, game_features = self.observe_state( variable_names, feature_names) else: screen = None variables = None game_features = None # update game statistics and return custom rewards self.update_game_properties() list_r = self.update_game_statistics() r_bis = 0 if self.custom_reward and self.reward_builder: r_bis = self.reward_builder.get_reward(list_r) return r + r_bis, screen, variables, game_features
class Game(object): def __init__(self, scenario, action_builder, reward_values=None, score_variable='FRAGCOUNT', freedoom=True, screen_resolution='RES_400X225', screen_format='CRCGCB', use_screen_buffer=True, use_depth_buffer=False, labels_mapping='', game_features='', mode='PLAYER', player_rank=0, players_per_game=1, render_hud=False, render_minimal_hud=False, render_crosshair=True, render_weapon=True, render_decals=False, render_particles=False, render_effects_sprites=False, respawn_protect=True, spawn_farthest=True, freelook=False, name='Arnold', color=0, visible=False, n_bots=0, use_scripted_marines=None, doom_skill=2): """ Create a new game. score_variable: indicates in which game variable the user score is stored. by default it's in FRAGCOUNT, but the score in ACS against built-in AI bots can be stored in USER1, USER2, etc. render_decals: marks on the walls render_particles: particles like for impacts / traces render_effects_sprites: gun puffs / blood splats color: 0 - green, 1 - gray, 2 - brown, 3 - red, 4 - light gray, 5 - light brown, 6 - light red, 7 - light blue """ # game resources game_filename = '%s.wad' % ('freedoom2' if freedoom else 'Doom2') self.scenario_path = os.path.join(RESOURCES_DIR, 'scenarios', '%s.wad' % scenario) self.game_path = os.path.join(RESOURCES_DIR, game_filename) # check parameters assert os.path.isfile(self.scenario_path) assert os.path.isfile(self.game_path) assert hasattr(GameVariable, score_variable) assert hasattr(ScreenResolution, screen_resolution) assert hasattr(ScreenFormat, screen_format) assert use_screen_buffer or use_depth_buffer assert hasattr(Mode, mode) assert not (render_minimal_hud and not render_hud) assert len(name.strip()) > 0 and color in range(8) assert n_bots >= 0 assert (type(use_scripted_marines) is bool or use_scripted_marines is None and n_bots == 0) assert 0 <= doom_skill <= 4 assert 0 < players_per_game assert 0 <= player_rank # action builder self.action_builder = action_builder # add the score variable to the game variables list self.score_variable = score_variable game_variables.append(('score', getattr(GameVariable, score_variable))) self.player_rank = player_rank self.players_per_game = players_per_game # screen buffer / depth buffer / labels buffer / mode self.screen_resolution = screen_resolution self.screen_format = screen_format self.use_screen_buffer = use_screen_buffer self.use_depth_buffer = use_depth_buffer self.labels_mapping = parse_labels_mapping(labels_mapping) self.game_features = parse_game_features(game_features) self.use_labels_buffer = self.labels_mapping is not None self.use_game_features = any(self.game_features) self.mode = mode # rendering options self.render_hud = render_hud self.render_minimal_hud = render_minimal_hud self.render_crosshair = render_crosshair self.render_weapon = render_weapon self.render_decals = render_decals self.render_particles = render_particles self.render_effects_sprites = render_effects_sprites # respawn invincibility / distance self.respawn_protect = respawn_protect self.spawn_farthest = spawn_farthest # freelook / agent name / agent color self.freelook = freelook self.name = name.strip() self.color = color # window visibility self.visible = visible # actor reward self.reward_builder = RewardBuilder(self, reward_values) # game statistics self.stat_keys = [ 'kills', 'deaths', 'suicides', 'frags', 'k/d', 'medikits', 'armors', 'pistol', 'shotgun', 'chaingun', 'rocketlauncher', 'plasmarifle', 'bfg9000', 'bullets', 'shells', 'rockets', 'cells' ] self.statistics = {} # number of bots in the game self.n_bots = n_bots self.use_scripted_marines = use_scripted_marines # doom skill self.doom_skill = doom_skill # manual control self.count_non_forward_actions = 0 self.count_non_turn_actions = 0 def update_game_variables(self): """ Check and update game variables. """ # read game variables new_v = {k: self.game.get_game_variable(v) for k, v in game_variables} assert all(v.is_integer() or k[-2:] in ['_x', '_y', '_z'] for k, v in new_v.items()) new_v = { k: (int(v) if v.is_integer() else float(v)) for k, v in new_v.items() } health = new_v['health'] armor = new_v['armor'] sel_weapon = new_v['sel_weapon'] sel_ammo = new_v['sel_ammo'] bullets = new_v['bullets'] shells = new_v['shells'] rockets = new_v['rockets'] cells = new_v['cells'] fist = new_v['fist'] pistol = new_v['pistol'] shotgun = new_v['shotgun'] chaingun = new_v['chaingun'] rocketlauncher = new_v['rocketlauncher'] plasmarifle = new_v['plasmarifle'] bfg9000 = new_v['bfg9000'] # check game variables if sel_weapon == -1: logger.warning("SELECTED WEAPON is -1!") new_v['sel_weapon'] = 1 sel_weapon = 1 if sel_ammo == -1: logger.warning("SELECTED AMMO is -1!") new_v['sel_ammo'] = 0 sel_ammo = 0 assert sel_weapon in range(1, 8), sel_weapon assert sel_ammo >= 0, sel_ammo assert all(x in [0, 1] for x in [ fist, pistol, shotgun, chaingun, rocketlauncher, plasmarifle, bfg9000 ]) assert 0 <= health <= 200 or health < 0 and self.game.is_player_dead() assert 0 <= armor <= 200, (health, armor) assert 0 <= bullets <= 200 and 0 <= shells <= 50 assert 0 <= rockets <= 50 and 0 <= cells <= 300 # fist if sel_weapon == 1: assert sel_ammo == 0 # pistol elif sel_weapon == 2: assert pistol and sel_ammo == bullets # shotgun elif sel_weapon == 3: assert shotgun and sel_ammo == shells # chaingun elif sel_weapon == 4: assert chaingun and sel_ammo == bullets # rocket launcher elif sel_weapon == 5: assert rocketlauncher and sel_ammo == rockets # plasma rifle elif sel_weapon == 6: assert plasmarifle and sel_ammo == cells # BFG9000 elif sel_weapon == 7: assert bfg9000 and sel_ammo == cells # update actor properties self.prev_properties = self.properties self.properties = new_v def update_statistics_and_reward(self, action): """ Update statistics of the current game based on the previous and the current properties, and create a reward. """ stats = self.statistics[self.map_id] # reset reward self.reward_builder.reset() # we need to know the current and previous properties assert self.prev_properties is not None and self.properties is not None # distance moving_forward = action[self.mapping['MOVE_FORWARD']] turn_left = action[self.mapping['TURN_LEFT']] turn_right = action[self.mapping['TURN_RIGHT']] if moving_forward and not (turn_left or turn_right): diff_x = self.properties['position_x'] - self.prev_properties[ 'position_x'] diff_y = self.properties['position_y'] - self.prev_properties[ 'position_y'] distance = math.sqrt(diff_x**2 + diff_y**2) self.reward_builder.distance(distance) # kill d = self.properties['score'] - self.prev_properties['score'] if d > 0: self.reward_builder.kill(d) stats['kills'] += d for _ in range(int(d)): self.log('Kill') # death if self.game.is_player_dead(): self.reward_builder.death() stats['deaths'] += 1 self.log('Dead') # suicide if self.properties['frag_count'] < self.prev_properties['frag_count']: self.reward_builder.suicide() stats['suicides'] += 1 self.log('Suicide') # found / lost health d = self.properties['health'] - self.prev_properties['health'] if d != 0: if d > 0: self.reward_builder.medikit(d) stats['medikits'] += 1 else: self.reward_builder.injured(d) self.log('%s health (%i -> %i)' % ( 'Found' if d > 0 else 'Lost', self.prev_properties['health'], self.properties['health'], )) # found / lost armor d = self.properties['armor'] - self.prev_properties['armor'] if d != 0: if d > 0: self.reward_builder.armor() stats['armors'] += 1 self.log('%s armor (%i -> %i)' % ( 'Found' if d > 0 else 'Lost', self.prev_properties['armor'], self.properties['armor'], )) # change weapon if self.properties['sel_weapon'] != self.prev_properties['sel_weapon']: self.log('Switched weapon: %s -> %s' % ( WEAPON_NAMES[self.prev_properties['sel_weapon']], WEAPON_NAMES[self.properties['sel_weapon']], )) # found weapon for i, weapon in enumerate([ 'pistol', 'shotgun', 'chaingun', 'rocketlauncher', 'plasmarifle', 'bfg9000' ]): if self.prev_properties[weapon] == self.properties[weapon]: continue # assert(self.prev_properties[weapon] == 0 and # TODO check # self.properties[weapon] == 1), (weapon, self.prev_properties[weapon], self.properties[weapon]) self.reward_builder.weapon() stats[weapon] += 1 self.log('Found weapon: %s' % WEAPON_NAMES[i + 1]) # found / lost ammo for ammo in ['bullets', 'shells', 'rockets', 'cells']: d = self.properties[ammo] - self.prev_properties[ammo] if d != 0: if d > 0: self.reward_builder.ammo() stats[ammo] += 1 else: self.reward_builder.use_ammo() self.log('%s ammo: %s (%i -> %i)' % ('Found' if d > 0 else 'Lost', ammo, self.prev_properties[ammo], self.properties[ammo])) def log(self, message): """ Log the game event. During training, we don't want to display events. """ if self.log_events: logger.info(message) def start(self, map_id, episode_time=None, manual_control=False, log_events=False): """ Start the game. If `episode_time` is given, the game will end after the specified time. """ assert type(manual_control) is bool self.manual_control = manual_control # Save statistics for this map self.statistics[map_id] = {k: 0 for k in self.stat_keys} # Episode time self.episode_time = episode_time # initialize the game self.game = DoomGame() self.game.set_doom_scenario_path(self.scenario_path) self.game.set_doom_game_path(self.game_path) # map assert map_id > 0 self.map_id = map_id self.game.set_doom_map("map%02i" % map_id) # time limit if episode_time is not None: self.game.set_episode_timeout(int(35 * episode_time)) # log events that happen during the game (useful for testing) self.log_events = log_events # game parameters args = [] # host / server if self.players_per_game > 1: port = 5092 + self.player_rank // self.players_per_game if self.player_rank % self.players_per_game == 0: args.append('-host %i -port %i' % (self.players_per_game, port)) else: args.append('-join 127.0.0.1:%i' % port) else: args.append('-host 1') # screen buffer / depth buffer / labels buffer / mode screen_resolution = getattr(ScreenResolution, self.screen_resolution) self.game.set_screen_resolution(screen_resolution) self.game.set_screen_format(getattr(ScreenFormat, self.screen_format)) self.game.set_depth_buffer_enabled(self.use_depth_buffer) self.game.set_labels_buffer_enabled(self.use_labels_buffer or self.use_game_features) self.game.set_mode(getattr(Mode, self.mode)) # rendering options self.game.set_render_hud(self.render_hud) self.game.set_render_minimal_hud(self.render_minimal_hud) self.game.set_render_crosshair(self.render_crosshair) self.game.set_render_weapon(self.render_weapon) self.game.set_render_decals(self.render_decals) self.game.set_render_particles(self.render_particles) self.game.set_render_effects_sprites(self.render_effects_sprites) # deathmatch mode # players will respawn automatically after they die # autoaim is disabled for all players args.append('-deathmatch') args.append('+sv_forcerespawn 1') args.append('+sv_noautoaim 1') # respawn invincibility / distance # players will be invulnerable for two second after spawning # players will be spawned as far as possible from any other players args.append('+sv_respawnprotect %i' % self.respawn_protect) args.append('+sv_spawnfarthest %i' % self.spawn_farthest) # freelook / agent name / agent color args.append('+freelook %i' % (1 if self.freelook else 0)) args.append('+name %s' % self.name) args.append('+colorset %i' % self.color) # enable the cheat system (so that we can still # send commands to the game in self-play mode) args.append('+sv_cheats 1') # load parameters self.args = args for arg in args: self.game.add_game_args(arg) # window visibility self.game.set_window_visible(self.visible) # available buttons self.mapping = add_buttons(self.game, self.action_builder.available_buttons) # doom skill (https://zdoom.org/wiki/GameSkill) self.game.set_doom_skill(self.doom_skill + 1) # start the game self.game.init() # initialize the game after player spawns self.initialize_game() def reset(self): """ Reset the game if necessary. This can be because: - we reach the end of an episode (we restart the game) - because the agent is dead (we make it respawn) """ self.count_non_forward_actions = 0 # if the player is dead if self.is_player_dead(): # respawn it (deathmatch mode) if self.episode_time is None: self.respawn_player() # or reset the episode (episode ends when the agent dies) else: self.new_episode() # start a new episode if it is finished if self.is_episode_finished(): self.new_episode() # deal with a ViZDoom issue while self.is_player_dead(): logger.warning('Player %i is still dead after respawn.' % self.params.player_rank) self.respawn_player() def update_bots(self): """ Add built-in AI bots. There are two types of AI: built-in AI and ScriptedMarines. """ # only the host takes care of the bots if self.player_rank % self.players_per_game != 0: return if self.use_scripted_marines: command = "pukename set_value always 2 %i" % self.n_bots self.game.send_game_command(command) else: self.game.send_game_command("removebots") for _ in range(self.n_bots): self.game.send_game_command("addbot") def is_player_dead(self): """ Detect whether the player is dead. """ return self.game.is_player_dead() def is_episode_finished(self): """ Return whether the episode is finished. This should only be the case after the episode timeout. """ return self.game.is_episode_finished() def is_final(self): """ Return whether the game is in a final state. """ return self.is_player_dead() or self.is_episode_finished() def new_episode(self): """ Start a new episode. """ assert self.is_episode_finished() or self.is_player_dead() self.game.new_episode() self.log('New episode') self.initialize_game() def respawn_player(self): """ Respawn the player on death. """ assert self.is_player_dead() self.game.respawn_player() self.log('Respawn player') self.initialize_game() def initialize_game(self): """ Initialize the game after the player spawns / respawns. Be sure that properties from the previous life are not considered in this one. """ # generate buffers game_state = self.game.get_state() self._screen_buffer = game_state.screen_buffer self._depth_buffer = game_state.depth_buffer self._labels_buffer = game_state.labels_buffer self._labels = game_state.labels # actor properties self.prev_properties = None self.properties = None # advance a few steps to avoid bugs due # to initial weapon changes in ACS self.game.advance_action(SKIP_INITIAL_ACTIONS) self.update_game_variables() # if there are bots in the game, and if this is a new game self.update_bots() def randomize_textures(self, randomize): """ Randomize the textures of the map. """ assert type(randomize) is bool randomize = 1 if randomize else 0 self.game.send_game_command("pukename set_value always 4 %i" % randomize) def init_bots_health(self, health): """ Initial bots health. """ assert self.use_scripted_marines or health == 100 assert 0 < health <= 100 self.game.send_game_command("pukename set_value always 5 %i" % health) def make_action(self, action, frame_skip=1, sleep=None): """ Make an action. If `sleep` is given, the network will wait `sleep` seconds between each action. """ assert frame_skip >= 1 # convert selected action to the ViZDoom action format action = self.action_builder.get_action(action) # select agent favorite weapon for weapon_name, weapon_ammo, weapon_id in WEAPONS_PREFERENCES: min_ammo = 40 if weapon_name == 'bfg9000' else 1 if self.properties[weapon_name] > 0 and self.properties[ weapon_ammo] >= min_ammo: if self.properties['sel_weapon'] != weapon_id: # action = ([False] * self.mapping['SELECT_WEAPON%i' % weapon_id]) + [True] switch_action = ( [False] * self.mapping['SELECT_WEAPON%i' % weapon_id]) + [True] action = action + switch_action[len(action):] self.log("Manual weapon change: %s -> %s" % (WEAPON_NAMES[self.properties['sel_weapon']], weapon_name)) break if action[self.mapping['MOVE_FORWARD']]: self.count_non_forward_actions = 0 else: self.count_non_forward_actions += 1 if action[self.mapping['TURN_LEFT']] or action[ self.mapping['TURN_RIGHT']]: self.count_non_turn_actions = 0 else: self.count_non_turn_actions += 1 if self.manual_control and (self.count_non_forward_actions >= 30 or self.count_non_turn_actions >= 60): manual_action = [False] * len(action) manual_action[self.mapping['TURN_RIGHT']] = True manual_action[self.mapping['SPEED']] = True if self.count_non_forward_actions >= 30: manual_action[self.mapping['MOVE_FORWARD']] = True manual_repeat = 40 self.count_non_forward_actions = 0 self.count_non_turn_actions = 0 else: manual_action = None # if we are visualizing the experiment, show all the frames one by one if self.visible: if manual_action is not None: logger.warning('Activated manual control') for _ in range(manual_repeat): self.game.make_action(manual_action) else: for _ in range(frame_skip): self.game.make_action(action) # death or episode finished if self.is_player_dead() or self.is_episode_finished(): break # sleep for smooth visualization if sleep is not None: time.sleep(sleep) else: if manual_action is not None: logger.warning('Activated manual control') self.game.make_action(manual_action, manual_repeat) else: self.game.make_action(action, frame_skip) # generate buffers game_state = self.game.get_state() if game_state is not None: self._screen_buffer = game_state.screen_buffer self._depth_buffer = game_state.depth_buffer self._labels_buffer = game_state.labels_buffer self._labels = game_state.labels # update game variables / statistics rewards self.update_game_variables() self.update_statistics_and_reward(action) @property def reward(self): """ Return the reward value. """ return self.reward_builder.reward def close(self): """ Close the current game. """ self.game.close() def print_statistics(self, eval_time=None): """ Print agent statistics. If `map_id` is given, statistics are given for the specified map only. Otherwise, statistics are given for all maps, with a summary. """ if 'all' in self.statistics: del self.statistics['all'] map_ids = sorted(self.statistics.keys()) if len(map_ids) == 0: logger.info("No statistics to show!") return for v in self.statistics.values(): assert set(self.stat_keys) == set(v.keys()) # sum the results on all maps for global statistics self.statistics['all'] = { k: sum(v[k] for v in self.statistics.values()) for k in self.stat_keys } # number of frags (kills - suicides) # 100% accurate if the number of frags is given by 'FRAGCOUNT' # almost 100% accurate if it is based on an internal ACS variable for v in self.statistics.values(): v['frags'] = v['kills'] - v['suicides'] # number of frags per minutes (with and without respawn time) if eval_time is not None: assert eval_time % 60 == 0 for k, v in self.statistics.items(): eval_minutes = eval_time / 60 if k == 'all': eval_minutes *= (len(self.statistics) - 1) respawn_time = (v['deaths'] * RESPAWN_SECONDS * 1.0 / 60) v['frags_pm'] = v['frags'] * 1.0 / eval_minutes v['frags_pm_r'] = v['frags'] * 1.0 / (eval_minutes + respawn_time) # Kills / Deaths # 100% accurate if the number of kills is given by an ACS variable # almost 100% accurate if it is based on 'FRAGCOUNT' for v in self.statistics.values(): v['k/d'] = v['kills'] * 1.0 / max(1, v['deaths']) # statistics to log log_lines = [ [''] + ['Map%02i' % i for i in map_ids] + ['All'], ('Kills', 'kills'), ('Deaths', 'deaths'), ('Suicides', 'suicides'), ('Frags', 'frags'), ('Frags/m', 'frags_pm'), ('Frags/m (r)', 'frags_pm_r'), ('K/D', 'k/d'), None, ('Medikits', 'medikits'), ('Armors', 'armors'), ('SuperShotgun', 'shotgun'), ('Chaingun', 'chaingun'), ('RocketLauncher', 'rocketlauncher'), ('PlasmaRifle', 'plasmarifle'), ('BFG9000', 'bfg9000'), ('Bullets', 'bullets'), ('Shells', 'shells'), ('Rockets', 'rockets'), ('Cells', 'cells'), ] # only show statistics on all maps if there is more than one map if len(map_ids) > 1: map_ids.append('all') logger.info('*************** Game statistics summary ***************') log_pattern = '{: >15}' + ('{: >8}' * len(map_ids)) for line in log_lines: if line is None: logger.info('') else: if type(line) is tuple: assert len(line) == 2 name, k = line if k in ['frags_pm', 'frags_pm_r'] and eval_time is None: continue line = ['%s:' % name] line += [self.statistics[map_id][k] for map_id in map_ids] else: assert type(line) is list line = line[:len(map_ids) + 1] line = ['%.3f' % x if type(x) is float else x for x in line] logger.info(log_pattern.format(*line)) def observe_state(self, params, last_states): """ Observe the current state of the game. """ # read game state screen, game_features = process_buffers(self, params) variables = [self.properties[x[0]] for x in params.game_variables] last_states.append(GameState(screen, variables, game_features)) # update most recent states if len(last_states) == 1: last_states.extend([last_states[0]] * (params.hist_size - 1)) else: assert len(last_states) == params.hist_size + 1 del last_states[0] # return the screen and the game features return screen, game_features
class DoomEnvironment: def __init__(self, config, visible, skiprate): self._game = DoomGame() self._game.load_config(config) self._game.set_window_visible(visible) self._game.set_mode(Mode.PLAYER) self._game.init() n_actions = self._game.get_available_buttons_size() self._actions = [list(a) for a in it.product([0, 1], repeat=n_actions)] self._skiprate = skiprate def make_visible(self): self._game.close() self._game.set_window_visible(True) self._game.set_mode(Mode.ASYNC_PLAYER) self._game.init() def get_n_buttons(self): return self._game.get_available_buttons_size() def observe(self): observation = self._game.get_state() screen = observation.screen_buffer game_variables = observation.game_variables return screen, game_variables def step(self, action_id): """Takes id of single action and performs it for self.skiprate frames :param action_id: index of action to perform :return: reward, is_done """ reward = self._game.make_action(self._actions[action_id], self._skiprate) return reward, self._game.is_episode_finished() def advance_action_step(self, action_id): """Takes id of single action and performs it for self.skiprate frames and renders every frame :param action_id: index of action to perform :return: is_done """ reward = 0.0 for _ in range(self._skiprate): reward += self._game.make_action(self._actions[action_id]) # it is vital to break if done for correct reward shaping if self._game.is_episode_finished(): break return reward, self._game.is_episode_finished() def reset(self): self._game.new_episode() def get_episode_reward(self): """Careful! Returns ___non-shaped___ episode reward""" return self._game.get_total_reward()
class VizDoomEnv(gym.Env): def __init__(self, config='my_way_home.cfg', repeat_action=1, render=False): self._game = DoomGame() self._game.load_config(config) self._game.set_mode(Mode.PLAYER) self._game.set_screen_format(ScreenFormat.GRAY8) self._game.set_screen_resolution(ScreenResolution.RES_640X480) self._game.set_window_visible(render) self._game.init() self._actions = self._get_actions() self._repeat_action = repeat_action self._is_rendered = False def _get_actions(self): num_actions = self._game.get_available_buttons_size() actions = [] for perm in itertools.product([False, True], repeat=num_actions): actions.append(list(perm)) return actions def _get_observation(self): state = self._game.get_state() if state is not None: return state.screen_buffer return None def _get_terminal(self): return self._game.is_episode_finished() def reset(self): self._game.new_episode() return self._get_observation() def step(self, action): action_ = self._actions[action] reward = self._game.make_action(action_, self._repeat_action) return self._get_observation(), reward, self._get_terminal(), [] def render(self, mode='human'): self._game.set_window_visible(True) def close(self): self._game.close()
class VizDoomEnv(gym.Env): ''' Wrapper for vizdoom to use as an OpenAI gym environment. ''' metadata = {'render.modes': ['human', 'rgb_array']} def __init__(self, params): super(VizDoomEnv, self).__init__() self.params = params self.game = DoomGame() self.game.load_config(params.scenarioPath) self._viewer = None self.frameskip = params.frameskip self.inputShape = params.inputShape self.sequenceLength = params.sequenceLength self.seqInputShape = (self.inputShape[0] * self.sequenceLength, self.inputShape[1], self.inputShape[2]) self.gameVariables = params.gameVariables self.numGameVariables = len(self.gameVariables) self.action_space = spaces.MultiDiscrete( [2] * self.game.get_available_buttons_size()) self.action_space.dtype = 'uint8' output_shape = (self.game.get_screen_channels(), self.game.get_screen_height(), self.game.get_screen_width()) self.observation_space = spaces.Box(low=0, high=255, shape=output_shape, dtype='uint8') self.game.init() # Maintain a buffer of last seq len frames. self.frameBuffer = [np.zeros(self.inputShape)] * self.sequenceLength def close(self): self.game.close() if self._viewer is not None: self._viewer.close() self._viewer = None def seed(self, seed=None): self.game.set_seed(seed) def step(self, action): reward = self.game.make_action(list(action), self.frameskip) state = self.game.get_state() done = self.game.is_episode_finished() if state is not None: observation = state.screen_buffer info = state.game_variables # Return the chosen game variables in info else: observation = np.zeros(shape=self.observation_space.shape, dtype=np.uint8) info = None processedObservation = self._preProcessImage(observation) del self.frameBuffer[0] self.frameBuffer.append(processedObservation) return self.frameBuffer, reward, done, info # Preprocess image for use in network def _preProcessImage(self, image): if image.shape != self.inputShape: image = cv2.resize(image.transpose(1, 2, 0), (self.inputShape[2], self.inputShape[1]), interpolation=cv2.INTER_AREA).transpose( 2, 0, 1) return image def reset(self): self.game.new_episode() state = self._preProcessImage(self.game.get_state().screen_buffer) self.frameBuffer = [state] * self.sequenceLength return self.frameBuffer def render(self, mode='human', close=False): if close: if self._viewer is not None: self._viewer.close() self._viewer = None return img = None state = self.game.get_state() if state is not None: img = state.screen_buffer if img is None: # at the end of the episode img = np.zeros(shape=self.observation_space.shape, dtype=np.uint8) if mode == 'rgb_array': return img elif mode is 'human': if self._viewer is None: self._viewer = rendering.SimpleImageViewer() self._viewer.imshow(img.transpose(1, 2, 0))
def play(self): # Create DoomGame instance. It will run the game and communicate with you. print("Initializing doom...") game = DoomGame() game.load_config("./examples/config/deepdoomplayer.cfg") game.init() print("Doom initialized.") episodes = 1 training_steps_per_epoch = 100 sleep_time = 0.100 train_episodes_finished = 0 train_rewards = [] for epoch in range(episodes): train_loss = [] game.new_episode() while (train_episodes_finished < 20): sleep(sleep_time) if game.is_episode_finished(): r = game.get_total_reward() train_rewards.append(r) game.new_episode() train_episodes_finished += 1 self._last_state = None self.last_action[1] = 1 # first frame must be handled differently if self.last_state is None: # the _last_state will contain the image data from the last self.state_frames frames self.last_state = np.stack(tuple( self.convert_image(game.get_state().image_buffer) for _ in range(self.state_frames)), axis=2) continue reward = game.make_action( DeepDoomPlayer.define_keys_to_action_pressed( self.last_action), 7) reward *= 0.01 imagebuffer = game.get_state().image_buffer if imagebuffer is None: terminal = True screen_resized_binary = np.zeros((40, 40)) imagebufferlast = imagebuffer if imagebuffer is not None: terminal = False screen_resized_binary = self.convert_image(imagebuffer) # add dimension screen_resized_binary = np.expand_dims(screen_resized_binary, axis=2) current_state = np.append(self.last_state[:, :, 1:], screen_resized_binary, axis=2) self.last_state = current_state self.last_action = self.choose_next_action_only_on_q() print(train_episodes_finished, "training episodes played.") print("Training results:") train_rewards = np.array(train_rewards) print("mean:", train_rewards.mean(), "std:", train_rewards.std(), "max:", train_rewards.max(), "min:", train_rewards.min()) # It will be done automatically anyway but sometimes you need to do it in the middle of the program... game.close() self._last_state = None
class ViZDoom(Environment): """ ViZDoom environment (https://github.com/mwydmuch/ViZDoom). """ def __init__(self, config_file): """ Initialize ViZDoom environment. Args: config_file: .cfg file path, which defines how a world works and look like (maps) """ self.game = DoomGame() # load configurations from file self.game.load_config(config_file) self.game.init() self.state_shape = self.featurize(self.game.get_state()).shape self.num_actions = len(self.game.get_available_buttons()) def __str__(self): return 'ViZDoom' def states(self): return dict(type='float', shape=self.state_shape) def actions(self): return dict(type='int', shape=(), num_values=self.num_actions) def close(self): self.game.close() def reset(self): self.game.new_episode() return self.featurize(self.game.get_state()) def seed(self, seed): self.game.setSeed(seed) return seed def featurize(self, state): H = state.screen_buffer.shape[0] W = state.screen_buffer.shape[1] _vars = state.game_variables.reshape(-1).astype(np.float32) _screen_buf = state.screen_buffer.reshape(-1).astype(np.float32) if state.depth_buffer is None: _depth_buf = np.zeros(H * W * 1, dtype=np.float32) else: _depth_buf = state.depth_buffer.reshape(-1).astype(np.float32) if state.labels_buffer is None: _labels_buf = np.zeros(H * W * 1, dtype=np.float32) else: _labels_buf = state.labels_buffer.reshape(-1).astype(np.float32) if state.automap_buffer is None: _automap_buf = np.zeros(H * W * 1, dtype=np.float32) else: _automap_buf = state.automap_buffer.reshape(-1).astype(np.float32) return np.concatenate( (_vars, _screen_buf, _depth_buf, _labels_buf, _automap_buf)) def execute(self, action): one_hot_enc = [0] * self.num_actions one_hot_enc[action] = 1 reward = self.game.make_action(one_hot_enc) terminal = self.game.is_episode_finished() states = self.featurize(self.game.get_state()) return states, terminal, reward
class VizDoomGym(gym.Env): """ Wraps a VizDoom environment """ def __init__(self): raise NotImplementedError def _init(self, mission_file: str, scaled_resolution: list): """ :param mission_file: name of the mission (.cfg) to run, :param scaled_resolution: resolution (height, width) of the video frames to run training on """ super(VizDoomGym, self).__init__() self.mission_file = mission_file self._logger = logging.getLogger(__name__) self._logger.info("Creating environment: VizDoom (%s)", self.mission_file) self.deathmatch = True # distance we need the agent to travel per time-step, otherwise we penalise self.distance_threshold = 15 self.prev_properties = None self.properties = None self.cum_kills = np.array([0]) # Create an instace on VizDoom game, initalise it from a scenario config file self.env = DoomGame() self.env.load_config(self.mission_file) self.env.set_window_visible(False) self.env.set_screen_format(ScreenFormat.RGB24) if self.deathmatch: self.env.add_game_args("-deathmatch") self.env.set_doom_skill(4) self._action_frame_repeat = 4 self.env.init() # Perform config validation: # Only RGB format with a seperate channel per colour is supported assert self.env.get_screen_format() == ScreenFormat.RGB24 # Only discrete actions are supported (no delta actions) self.available_actions = self.env.get_available_buttons() not_supported_actions = [ Button.LOOK_UP_DOWN_DELTA, Button.TURN_LEFT_RIGHT_DELTA, Button.MOVE_LEFT_RIGHT_DELTA, Button.MOVE_UP_DOWN_DELTA, Button.MOVE_FORWARD_BACKWARD_DELTA ] # print(available_actions) assert len((set(self.available_actions) - set(not_supported_actions))) \ == len(self.available_actions) self.metadata['render_modes'] = ['rgb_array'] # Allow only one button to be pressed at a given step self.action_space = gym.spaces.Discrete( self.env.get_available_buttons_size() - 1) self.rows = scaled_resolution[0] self.columns = scaled_resolution[1] self.observation_space = gym.spaces.Box(low=0.0, high=1.0, shape=(self.rows, self.columns, 3), dtype=np.float32) self._rgb_array = None self.steps = 0 self.global_steps = 0 self.reset() def _process_image(self, img): # PIL resize has indexing opposite to numpy array img = np.array(Image.fromarray(img).resize((self.columns, self.rows))) img = img.astype(np.float32) img = img / 255.0 return img def update_game_variables(self): """ Check and update game variables. """ # read game variables new_v = { k: self.env.get_game_variable(v) for k, v in game_variables.items() } assert all(v.is_integer() or k[-2:] in ['_x', '_y', '_z'] for k, v in new_v.items()) new_v = { k: (int(v) if v.is_integer() else float(v)) for k, v in new_v.items() } health = new_v['health'] armor = new_v['armor'] # check game variables assert 0 <= health <= 200 or health < 0 and self.env.is_player_dead() assert 0 <= armor <= 200, (health, armor) # update actor properties self.prev_properties = self.properties self.properties = new_v def update_reward(self): """ Update reward. """ # we need to know the current and previous properties assert self.prev_properties is not None and self.properties is not None reward = 0 # kill d = self.properties['score'] - self.prev_properties['score'] if d > 0: self.cum_kills += d reward += d * default_reward_values['KILL'] # death if self.env.is_player_dead(): reward += default_reward_values['DEATH'] # suicide if self.properties['frag_count'] < self.prev_properties['frag_count']: reward += default_reward_values['SUICIDE'] # found / lost health d = self.properties['health'] - self.prev_properties['health'] if d != 0: if d > 0: reward += default_reward_values['MEDIKIT'] else: reward += default_reward_values['INJURED'] # found / lost armor d = self.properties['armor'] - self.prev_properties['armor'] if d != 0: if d > 0: reward += default_reward_values['ARMOR'] # found / lost ammo d = self.properties['sel_ammo'] - self.prev_properties['sel_ammo'] if d != 0: if d > 0: reward += default_reward_values['AMMO'] else: reward += default_reward_values['USE_AMMO'] # distance # turn_left = (Button.TURN_LEFT == self.available_actions[action]) # turn_right = (Button.TURN_RIGHT == self.available_actions[action]) # if not (turn_left or turn_right): diff_x = self.properties['position_x'] - self.prev_properties[ 'position_x'] diff_y = self.properties['position_y'] - self.prev_properties[ 'position_y'] distance = np.sqrt(diff_x**2 + diff_y**2) if distance > self.distance_threshold: reward += default_reward_values['DISTANCE'] * distance else: reward += default_reward_values['STANDSTILL'] # living reward += default_reward_values['LIVING'] return reward # def increase_difficulty(self): # self.curr_skill += 1 # self.env.close() # self.env.set_doom_skill(self.curr_skill) # self.env.init() # print('changing skill to', self.curr_skill) # def update_map(self): # self.map_level += 1 # map_str = 'map0' + str(self.map_level) # # go with initial wad file if there's still maps on it # self.env.close() # self.env.set_doom_map(map_str) # self.env.init() def sub_reset(self): """Reset environment""" self.steps = 0 self.cum_kills = np.array([0]) self.prev_properties = None self.properties = None self.env.new_episode() self._rgb_array = self.env.get_state().screen_buffer observation = self._process_image(self._rgb_array) return observation def reset(self): observation = self.sub_reset() return observation def sub_step(self, action): """Take step""" one_hot_action = np.zeros(self.action_space.n, dtype=int) one_hot_action[action] = 1 # ALWAYS SPRINTING one_hot_action = np.append(one_hot_action, [1]) assert len(one_hot_action) == len(self.env.get_available_buttons()) _ = self.env.make_action(list(one_hot_action), self._action_frame_repeat) self.update_game_variables() if self.steps > 1: reward = self.update_reward() else: reward = 0 self.steps += 1 self.global_steps += 1 done = self.env.is_episode_finished() # state is available only if the episode is still running if not done: self._rgb_array = self.env.get_state().screen_buffer observation = self._process_image(self._rgb_array) return observation, reward, done def step(self, action): observation, reward, done = self.sub_step(action) return observation, reward, done, {} def close(self): """Close environment""" self.env.close() def seed(self, seed=None): """Seed""" if seed: self.env.set_seed(seed) def render(self, mode='human'): """Render frame""" if mode == 'rgb_array': return self._rgb_array raise NotImplementedError
class VizDoomEnv(Env): ''' Wrapper for vizdoom to use as an OpenAI gym environment. ''' metadata = {'render.modes': ['human', 'rgb_array']} def __init__(self, cfg_name, repeat=1): super(VizDoomEnv, self).__init__() self.game = DoomGame() self.game.load_config('./slm_lab/env/vizdoom/cfgs/' + cfg_name + '.cfg') self._viewer = None self.repeat = 1 # TODO In future, need to update action to handle (continuous) DELTA buttons using gym's Box space self.action_space = spaces.MultiDiscrete( [2] * self.game.get_available_buttons_size()) self.action_space.dtype = 'uint8' output_shape = (self.game.get_screen_height(), self.game.get_screen_width(), self.game.get_screen_channels()) self.observation_space = spaces.Box(low=0, high=255, shape=output_shape, dtype='uint8') self.game.init() def close(self): self.game.close() if self._viewer is not None: self._viewer.close() self._viewer = None def seed(self, seed=None): self.game.set_seed(seed) def step(self, action): reward = self.game.make_action(list(action), self.repeat) state = self.game.get_state() done = self.game.is_episode_finished() # info = self._get_game_variables(state.game_variables) info = {} if state is not None: observation = state.screen_buffer.transpose(1, 2, 0) else: observation = np.zeros(shape=self.observation_space.shape, dtype=np.uint8) return observation, reward, done, info def reset(self): # self.seed(seed) self.game.new_episode() return self.game.get_state().screen_buffer.transpose(1, 2, 0) def render(self, mode='human', close=False): if close: if self._viewer is not None: self._viewer.close() self._viewer = None return img = None state = self.game.get_state() if state is not None: img = state.screen_buffer if img is None: # at the end of the episode img = np.zeros(shape=self.observation_space.shape, dtype=np.uint8) if mode == 'rgb_array': return img elif mode is 'human': if self._viewer is None: self._viewer = rendering.SimpleImageViewer() self._viewer.imshow(img.transpose(1, 2, 0)) def _get_game_variables(self, state_variables): info = {} if state_variables is not None: info['KILLCOUNT'] = state_variables[0] info['ITEMCOUNT'] = state_variables[1] info['SECRETCOUNT'] = state_variables[2] info['FRAGCOUNT'] = state_variables[3] info['HEALTH'] = state_variables[4] info['ARMOR'] = state_variables[5] info['DEAD'] = state_variables[6] info['ON_GROUND'] = state_variables[7] info['ATTACK_READY'] = state_variables[8] info['ALTATTACK_READY'] = state_variables[9] info['SELECTED_WEAPON'] = state_variables[10] info['SELECTED_WEAPON_AMMO'] = state_variables[11] info['AMMO1'] = state_variables[12] info['AMMO2'] = state_variables[13] info['AMMO3'] = state_variables[14] info['AMMO4'] = state_variables[15] info['AMMO5'] = state_variables[16] info['AMMO6'] = state_variables[17] info['AMMO7'] = state_variables[18] info['AMMO8'] = state_variables[19] info['AMMO9'] = state_variables[20] info['AMMO0'] = state_variables[21] return info
def train(conf): #to get total time of training start_time = time.time() #set the seeds for reproductability random.seed(conf.seed) np.random.seed(conf.seed) tf.set_random_seed(conf.seed) # Avoid Tensorflow eats up GPU memory config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) K.set_session(sess) game = DoomGame() game.load_config("VizDoom/scenarios/defend_the_center.cfg") game.set_sound_enabled(True) game.set_screen_resolution(ScreenResolution.RES_640X480) game.set_window_visible(False) game.set_living_reward(0.1) game.init() game.new_episode() game_state = game.get_state() misc = game_state.game_variables # [KILLCOUNT, AMMO, HEALTH] prev_misc = misc action_size = game.get_available_buttons_size() img_rows , img_cols = 64, 64 # Convert image into Black and white img_channels = 4 # We stack 4 frames state_size = (img_rows, img_cols, img_channels) agent = DoubleDQNAgent(state_size, action_size, conf) agent.model = Networks.dqn(state_size, action_size, agent.learning_rate) agent.target_model = Networks.dqn(state_size, action_size, agent.learning_rate) x_t = game_state.screen_buffer # 480 x 640 x_t = preprocessImg(x_t, size=(img_rows, img_cols)) s_t = np.stack(([x_t]*4), axis=2) # It becomes 64x64x4 s_t = np.expand_dims(s_t, axis=0) # 1x64x64x4 is_terminated = game.is_episode_finished() # Start training epsilon = agent.initial_epsilon GAME = 0 t = 0 max_life = 0 # Maximum episode life (Proxy for agent performance) life = 0 # Buffer to compute rolling statistics life_buffer, ammo_buffer, kills_buffer = [], [], [] scores, episodes, steps, kills, ammos = [], [], [], [], [] step = 0 episode = conf.episode e = 0 score = 0 while e < episode: loss = 0 Q_max = 0 r_t = 0 a_t = np.zeros([action_size]) # Epsilon Greedy action_idx = agent.get_action(s_t) a_t[action_idx] = 1 a_t = a_t.astype(int) r_t = game.make_action(a_t.tolist(), agent.frame_per_action) game_state = game.get_state() # Observe again after we take the action is_terminated = game.is_episode_finished() # print(r_t) score += r_t step += 1 if (is_terminated): if (life > max_life): max_life = life GAME += 1 life_buffer.append(life) ammo_buffer.append(misc[1]) kills_buffer.append(misc[0]) kills.append(misc[0]) ammos.append(misc[1]) print ("Episode Finish ", misc) # print(scores) game.new_episode() game_state = game.get_state() misc = game_state.game_variables x_t1 = game_state.screen_buffer scores.append(score) score = 0 steps.append(step) episodes.append(e) e += 1 x_t1 = game_state.screen_buffer misc = game_state.game_variables x_t1 = preprocessImg(x_t1, size=(img_rows, img_cols)) x_t1 = np.reshape(x_t1, (1, img_rows, img_cols, 1)) s_t1 = np.append(x_t1, s_t[:, :, :, :3], axis=3) r_t = agent.shape_reward(r_t, misc, prev_misc, t) if (is_terminated): life = 0 else: life += 1 # Update the cache prev_misc = misc # save the sample <s, a, r, s'> to the replay memory and decrease epsilon agent.replay_memory(s_t, action_idx, r_t, s_t1, is_terminated, t) # Do the training if t > agent.observe and t % agent.timestep_per_train == 0: Q_max, loss = agent.train_replay() s_t = s_t1 t += 1 # print info state = "" if t <= agent.observe: state = "observe" elif t > agent.observe and agent.epsilon > agent.final_epsilon: state = "explore" else: state = "train" if (is_terminated): print("TIME", t, "/ GAME", GAME, "/ STATE", state, \ "/ EPSILON", agent.epsilon, "/ ACTION", action_idx, "/ REWARD", score, \ "/ Q_MAX %e" % np.max(Q_max), "/ LIFE", max_life, "/ LOSS", loss) # Save Agent's Performance Statistics if GAME % agent.stats_window_size == 0 and t > agent.observe: print("Update Rolling Statistics") agent.mavg_score.append(np.mean(np.array(life_buffer))) agent.var_score.append(np.var(np.array(life_buffer))) agent.mavg_ammo_left.append(np.mean(np.array(ammo_buffer))) agent.mavg_kill_counts.append(np.mean(np.array(kills_buffer))) # Reset rolling stats buffer life_buffer, ammo_buffer, kills_buffer = [], [], [] total_time = time.time() - start_time return steps, scores, total_time, kills, ammos
# Enables labeling of in game objects labeling. game.set_labels_buffer_enabled(True) # Enables depth buffer. game.set_depth_buffer_enabled(True) # Enables buffer with top down map of he current episode/level . # game.set_automap_buffer_enabled(True) game.init() game.new_episode() game_state = game.get_state() if not game.is_episode_finished(): labels = game_state.labels_buffer # if labels is not None: # plt.imshow(labels) # plt.show() misc = game_state.game_variables # [Health] prev_misc = misc action_size = game.get_available_buttons_size( ) # [Turn Left, Turn Right, Move Forward] measurement_size = n_measures # [Health, Medkit, Poison] timesteps = [1, 2, 4, 8, 16, 32] goal_size = measurement_size * len(timesteps) img_rows, img_cols = 84, 84
def start(self): """ this will get passed hier """ # Create DoomGame instance. It will run the game and communicate with you. print ("Initializing doom...") game = DoomGame() game.load_config("./examples/config/learningtensorflow.cfg") game.init() print ("Doom initialized.") train_rewards = [] for epoch in range(DeepDoom.episodes): print ("\nEpoch", epoch) train_time = 0 train_episodes_finished = 0 train_loss = [] #start saving after 20 epoch if epoch > 20: if not os.path.exists(DeepDoom.checkpoint_path): os.mkdir(DeepDoom.checkpoint_path) self.saver.save(self.session, DeepDoom.checkpoint_path, global_step=epoch ) train_start = time() game.new_episode() for learning_step in tqdm(range(DeepDoom.training_steps_per_epoch)): if game.is_episode_finished(): #print("game is finished") r = game.get_total_reward() train_rewards.append(r) game.new_episode() train_episodes_finished += 1 self.last_state = None #sleep(sleep_time) # first frame must be handled differently if self.last_state is None: #print ("ich bin hier") # the last_state will contain the image data from the last self.state_frames frames self.last_state = np.stack(tuple(self.convert_image(game.get_state().image_buffer) for _ in range(self.state_frames)), axis=2) continue reward = game.make_action(DeepDoom.define_keys_to_action_pressed(self.last_action), 7) reward *= 0.01 #if screen_array is not None: imagebuffer = game.get_state().image_buffer if imagebuffer is None: terminal = True #print(reward) screen_resized_binary = np.zeros((40,40)) imagebufferlast = imagebuffer if imagebuffer is not None: terminal = False screen_resized_binary = self.convert_image(imagebuffer) # add dimension screen_resized_binary = np.expand_dims(screen_resized_binary, axis=2) current_state = np.append(self.last_state[:, :, 1:], screen_resized_binary, axis=2) self.observations.append((self.last_state, self.last_action, reward, current_state, terminal)) #zeugs.write("oberservations %s \n" %len(self.observations)) if len(self.observations) > self.memory_size: self.observations.popleft() #sleep(sleep_time) # only train if done observing if len(self.observations) > self.observation_steps: #print("train") self.train() self.time += 1 self.last_state = current_state self.last_action = self.choose_next_action() if self.probability_of_random_action > self.final_random_action_prob \ and len(self.observations) > self.observation_steps: self.probability_of_random_action -= \ (self.initial_random_action_prob - self.final_random_action_prob) / self.explore_steps print (train_episodes_finished, "training episodes played.") print ("Training results:") train_rewards = np.array(train_rewards) train_end = time() train_time = train_end - train_start mean_loss = np.mean(train_loss) print ("mean:", train_rewards.mean(), "std:", train_rewards.std(), "max:", train_rewards.max(), "min:", train_rewards.min(), "epsilon:", self.probability_of_random_action) print ("t:", str(round(train_time, 2)) + "s") train_rewards = [] # It will be done automatically anyway but sometimes you need to do it in the middle of the program... game.close() self.last_state = None
def train(conf): #to get total time of training start_time = time.time() game = DoomGame() game.load_config("VizDoom/scenarios/defend_the_center.cfg") game.set_sound_enabled(True) game.set_screen_resolution(ScreenResolution.RES_640X480) game.set_window_visible(False) game.set_living_reward(0.1) game.init() game.new_episode() game_state = game.get_state() misc = game_state.game_variables # [KILLCOUNT, AMMO, HEALTH] prev_misc = misc action_size = game.get_available_buttons_size() agent = RandomAgent(action_size, conf) episode = conf.episode # Start training GAME = 0 t = 0 max_life = 0 # Maximum episode life (Proxy for agent performance) life = 0 scores, episodes, steps, kills, ammos = [], [], [], [], [] step = 0 episode = conf.episode e = 0 score = 0 while e < episode: loss = 0 Q_max = 0 r_t = 0 a_t = np.zeros([action_size]) action_idx = agent.select_action() a_t[action_idx] = 1 a_t = a_t.astype(int) r_t = game.make_action(a_t.tolist(), 4) game_state = game.get_state() # Observe again after we take the action is_terminated = game.is_episode_finished() score += r_t step += 1 if (is_terminated): if (life > max_life): max_life = life GAME += 1 kills.append(misc[0]) ammos.append(misc[1]) print ("Episode Finish ", misc) # print(scores) game.new_episode() game_state = game.get_state() misc = game_state.game_variables x_t1 = game_state.screen_buffer scores.append(score) score = 0 steps.append(step) episodes.append(e) e += 1 misc = game_state.game_variables r_t = agent.shape_reward(r_t, misc, prev_misc, t) if (is_terminated): life = 0 else: life += 1 # Update the cache prev_misc = misc t += 1 total_time = time.time() - start_time return steps, scores, total_time, kills, ammos # return steps, returns, total_time
class VizDoom(gym.Env): """ Wraps a VizDoom environment """ def __init__(self, cfg_path, number_maps, scaled_resolution=(42, 42), action_frame_repeat=4, clip=(-1, 1), seed=None, data_augmentation=False): """ Gym environment for training reinforcement learning agents. :param cfg_path: name of the mission (.cfg) to run :param number_maps: number of maps which are contained within the cfg file :param scaled_resolution: resolution (height, width) of the observation to be returned with each step :param action_frame_repeat: how many game tics should an action be active :param clip: how much the reward returned on each step should be clipped to :param seed: seed for random, used to determine the other that the doom maps should be shown. :param data_augmentation: bool to determine whether or not to use data augmentation (adding randomly colored, randomly sized boxes to observation) """ self.cfg_path = str(cfg_path) if not os.path.exists(self.cfg_path): raise ValueError("Cfg file not found", cfg_path) if not self.cfg_path.endswith('.cfg'): raise ValueError("cfg_path must end with .cfg") self.number_maps = number_maps self.scaled_resolution = scaled_resolution self.action_frame_repeat = action_frame_repeat self.clip = clip self.data_augmentation = data_augmentation if seed: random.seed(seed) super(VizDoom, self).__init__() self._logger = logging.getLogger(__name__) self._logger.info("Creating environment: VizDoom (%s)", self.cfg_path) # Create an instace on VizDoom game, initalise it from a scenario config file self.env = DoomGame() self.env.load_config(self.cfg_path) self.env.init() # Perform config validation: # Only RGB format with a seperate channel per colour is supported # assert self.env.get_screen_format() == ScreenFormat.RGB24 # Only discreete actions are supported (no delta actions) available_actions = self.env.get_available_buttons() not_supported_actions = [ Button.LOOK_UP_DOWN_DELTA, Button.TURN_LEFT_RIGHT_DELTA, Button.MOVE_LEFT_RIGHT_DELTA, Button.MOVE_UP_DOWN_DELTA, Button.MOVE_FORWARD_BACKWARD_DELTA ] assert len((set(available_actions) - set(not_supported_actions))) == len(available_actions) # Allow only one button to be pressed at a given step self.action_space = gym.spaces.Discrete( self.env.get_available_buttons_size()) rows = scaled_resolution[1] columns = scaled_resolution[0] self.observation_space = gym.spaces.Box(0.0, 255.0, shape=(columns, rows, 3), dtype=np.float32) self._rgb_array = None self.reset() def _process_image(self, shape=None): """ Convert the vizdoom environment observation numpy are into the desired resolution and shape :param shape: desired shape in the format (rows, columns) :return: resized and rescaled image in the format (rows, columns, channels) """ if shape is None: rows, columns, _ = self.observation_space.shape else: rows, columns = shape # PIL resize has indexing opposite to numpy array img = VizDoom._resize(self._rgb_array.transpose(1, 2, 0), (columns, rows)) return img @staticmethod def _augment_data(img): """ Augment input image with N randomly colored boxes of dimension x by y where N is randomly sampled between 0 and 6 and x and y are randomly sampled from between 0.1 and 0.35 :param img: input image to be augmented - format (rows, columns, channels) :return img: augmented image - format (rows, columns, channels) """ dimx = img.shape[0] dimy = img.shape[1] max_rand_dim = .25 min_rand_dim = .1 num_blotches = np.random.randint(0, 6) for _ in range(num_blotches): # locations in [0,1] rand = np.random.rand rx = rand() ry = rand() rdx = rand() * max_rand_dim + min_rand_dim rdy = rand() * max_rand_dim + min_rand_dim rx, rdx = [round(r * dimx) for r in (rx, rdx)] ry, rdy = [round(r * dimy) for r in (ry, rdy)] for c in range(3): img[rx:rx + rdx, ry:ry + rdy, c] = np.random.randint(0, 255) return img @staticmethod def _resize(img, shape): """Resize the specified image. :param img: image to resize :param shape: desired shape in the format (rows, columns) :return: resized image """ if not (OPENCV_AVAILABLE or PILLOW_AVAILABLE): raise ValueError('No image library backend found.' ' Install either ' 'OpenCV or Pillow to support image processing.') if OPENCV_AVAILABLE: return cv2.resize(img, shape, interpolation=cv2.INTER_AREA) if PILLOW_AVAILABLE: return np.array(PIL.Image.fromarray(img).resize(shape)) raise NotImplementedError def reset(self): """ Resets environment to start a new mission. If there is more than one maze it will randomly select a new maze. :return: initial observation of the environment as an rgb array in the format (rows, columns, channels) """ if self.number_maps is not 0: self.doom_map = random.choice( ["map" + str(i).zfill(2) for i in range(self.number_maps)]) self.env.set_doom_map(self.doom_map) self.env.new_episode() self._rgb_array = self.env.get_state().screen_buffer observation = self._process_image() return observation def step(self, action): """Perform the specified action for the self.action_frame_repeat ticks within the environment. :param action: the index of the action to perform. The actions are specified when the cfg is created. The defaults are "MOVE_FORWARD TURN_LEFT TURN_RIGHT" :return: tuple following the gym interface, containing: - observation as a numpy array of shape (rows, height, channels) - scalar clipped reward - boolean which is true when the environment is done - {} """ one_hot_action = np.zeros(self.action_space.n, dtype=int) one_hot_action[action] = 1 reward = self.env.make_action(list(one_hot_action), self.action_frame_repeat) done = self.env.is_episode_finished() # state is available only if the episode is still running if not done: self._rgb_array = self.env.get_state().screen_buffer observation = self._process_image() if self.data_augmentation: observation = VizDoom._augment_data(observation) if self.clip: reward = np.clip(reward, self.clip[0], self.clip[1]) return observation, reward, done, {} def step_record(self, action, record_path, record_shape=(120, 140)): """Perform the specified action for the self.action_frame_repeat ticks within the environment. :param action: the index of the action to perform. The actions are specified when the cfg is created. The defaults are "MOVE_FORWARD TURN_LEFT TURN_RIGHT" :param record_path: the path to save the image of the environment to :param record_shape: the shape of the image to save :return: tuple following the gym interface, containing: - observation as a numpy array of shape (rows, height, channels) - scalar clipped reward - boolean which is true when the environment is done - {} """ one_hot_action = np.zeros(self.action_space.n, dtype=int) one_hot_action[action] = 1 reward = 0 for _ in range(self.action_frame_repeat // 2): reward += self.env.make_action(list(one_hot_action), 2) env_state = self.env.get_state() if env_state: self._rgb_array = self.env.get_state().screen_buffer imageio.imwrite( os.path.join(record_path, str(datetime.datetime.now()) + ".png"), self._process_image(record_shape)) done = self.env.is_episode_finished() # state is available only if the episode is still running if not done: self._rgb_array = self.env.get_state().screen_buffer observation = self._process_image() if self.clip: reward = np.clip(reward, self.clip[0], self.clip[1]) return observation, reward, done, {} def close(self): """Close environment""" self.env.close() def render(self, mode='rgb_array'): """Render frame""" if mode == 'rgb_array': return self._rgb_array raise NotImplementedError def create_env(self): """ Returns a function to create an environment with the generated mazes. Used for vectorising the environment. For example as used by Stable Baselines :return: a function to create an environment with the generated mazes """ return lambda: VizDoom(self.cfg_path, number_maps=self.number_maps, scaled_resolution=self.scaled_resolution, action_frame_repeat=self.action_frame_repeat)
class VizdoomEnv(gym.Env): def __init__(self, level): # init game self.game = DoomGame() self.game.set_screen_resolution(ScreenResolution.RES_640X480) scenarios_dir = os.path.join(os.path.dirname(__file__), 'scenarios') self.game.load_config(os.path.join(scenarios_dir, CONFIGS[level][0])) self.game.set_window_visible(False) self.game.init() self.state = None self.action_space = spaces.Discrete(CONFIGS[level][1]) self.observation_space = spaces.Box( 0, 255, (self.game.get_screen_height(), self.game.get_screen_width(), self.game.get_screen_channels()), dtype=np.uint8) self.viewer = None def step(self, action): # convert action to vizdoom action space (one hot) act = np.zeros(self.action_space.n) act[action] = 1 act = np.uint8(act) act = act.tolist() reward = self.game.make_action(act) state = self.game.get_state() done = self.game.is_episode_finished() info = {} if not done: observation = np.transpose(state.screen_buffer, (1, 2, 0)) else: observation = np.uint8(np.zeros(self.observation_space.shape)) info = {"episode": {"r": self.game.get_total_reward()}} return observation, reward, done, info def seed(self, seed): self.game.set_seed(seed) def close(self): self.game.close() def reset(self): self.game.new_episode() self.state = self.game.get_state() img = self.state.screen_buffer return np.transpose(img, (1, 2, 0)) def render(self, mode='human'): try: img = self.game.get_state().screen_buffer img = np.transpose(img, [1, 2, 0]) if self.viewer is None: self.viewer = rendering.SimpleImageViewer() self.viewer.imshow(img) except AttributeError: pass @staticmethod def get_keys_to_action(): # you can press only one key at a time! keys = { (): 2, (ord('a'), ): 0, (ord('d'), ): 1, (ord('w'), ): 3, (ord('s'), ): 4, (ord('q'), ): 5, (ord('e'), ): 6 } return keys
class DoomScenario: """ DoomScenario class runs instances of Vizdoom according to scenario configuration (.cfg) files. Scenario Configuration files for this project are located in the /src/configs/ folder. """ def __init__(self, config_filename): ''' Method initiates Vizdoom with desired configuration file. ''' self.config_filename = config_filename self.game = DoomGame() self.game.load_config("configs/" + config_filename) self.game.set_window_visible(False) self.game.init() self.res = (self.game.get_screen_height(), self.game.get_screen_width()) self.actions = [ list(a) for a in it.product([0, 1], repeat=self.game.get_available_buttons_size()) ] self.pbar = None self.game.new_episode() def play(self, action, tics): ''' Method advances state with desired action for a number of tics. ''' self.game.set_action(action) self.game.advance_action(tics, True) if self.pbar: self.pbar.update(int(tics)) def get_processed_state(self, depth_radius, depth_contrast): ''' Method processes the Vizdoom RGB and depth buffer into a composite one channel image that can be used by the Models. depth_radius defines how far the depth buffer sees with 1.0 being as far as ViZDoom allows. depth_contrast defines how much of the depth buffer is in the final processed image as compared to the greyscaled RGB buffer. **processed = (1-depth_contrast)* grey_buffer + depth_contrast*depth_buffer ''' state = self.game.get_state() if not self.game.is_episode_finished(): img = state.screen_buffer # screen pixels # print(img) screen_buffer = np.array(img).astype('float32') / 255 # print(screen_buffer.shape) # (3, 120, 160) try: # Grey Scaling grey_buffer = np.dot(np.transpose(screen_buffer, (1, 2, 0)), [0.21, 0.72, 0.07]) # print(grey_buffer.shape) # (120, 160) # Depth Radius depth_buffer = np.array(state.depth_buffer).astype('float32') / 255 depth_buffer[(depth_buffer > depth_radius)] = depth_radius #Effects depth radius depth_buffer_filtered = (depth_buffer - np.amin(depth_buffer)) / ( np.amax(depth_buffer) - np.amin(depth_buffer)) # Depth Contrast processed_buffer = ( (1 - depth_contrast) * grey_buffer) + (depth_contrast * (1 - depth_buffer)) processed_buffer = (processed_buffer - np.amin(processed_buffer) ) / (np.amax(processed_buffer) - np.amin(processed_buffer)) processed_buffer = np.round(processed_buffer, 6) processed_buffer = processed_buffer.reshape(self.res[-2:]) except: processed_buffer = np.zeros(self.res[-2:]) return processed_buffer # balance the depth & RGB data def run(self, agent, save_replay='', verbose=False, return_data=False): ''' Method runs a instance of DoomScenario. ''' if return_data: data_S = [] data_a = [] if verbose: print("\nRunning Simulation:", self.config_filename) self.pbar = tqdm(total=self.game.get_episode_timeout()) # Initiate New Instance self.game.close() self.game.set_window_visible(False) self.game.add_game_args("+vid_forcesurface 1 ") self.game.init() if save_replay != '': self.game.new_episode("../data/replay_data/" + save_replay) else: self.game.new_episode() # Run Simulation while not self.game.is_episode_finished(): S = agent.get_state_data(self) q = agent.model.online_network.predict(S) if np.random.random() < 0.1: q = np.random.choice(len(q[0]), 1, p=softmax(q[0], 1))[0] else: q = int(np.argmax(q[0])) a = agent.model.predict(self, q) if return_data: delta = np.zeros((len(self.actions))) a_ = np.cast['int'](a) delta[a_] = 1 data_S.append(S.reshape(S.shape[1], S.shape[2], S.shape[3])) data_a.append(delta) if not self.game.is_episode_finished(): self.play(a, agent.frame_skips + 1) if agent.model.__class__.__name__ == 'HDQNModel' and not self.game.is_episode_finished( ): if q >= len(agent.model.actions): for i in range(agent.model.skill_frame_skip): if not self.game.is_episode_finished(): a = agent.model.predict(self, q) self.play(a, agent.frame_skips + 1) else: break # Reset Agent and Return Score agent.frames = None if agent.model.__class__.__name__ == 'HDQNModel': agent.model.sub_model_frames = None score = self.game.get_total_reward() if verbose: self.pbar.close() print("Total Score:", score) if return_data: data_S = np.array(data_S) data_a = np.array(data_a) return [data_S, data_a] return score def replay(self, filename, verbose=False, doom_like=False): ''' Method runs a replay of the simulations at 800 x 600 resolution. ''' print("\nRunning Replay:", filename) # Initiate Replay self.game.close() self.game.set_screen_resolution(ScreenResolution.RES_800X600) self.game.set_window_visible(True) self.game.add_game_args("+vid_forcesurface 1") if doom_like: self.game.set_render_hud(True) self.game.set_render_minimal_hud(False) self.game.set_render_crosshair(False) self.game.set_render_weapon(True) self.game.set_render_particles(True) self.game.init() self.game.replay_episode("../data/replay_data/" + filename) # Run Replay while not self.game.is_episode_finished(): if verbose: print("Reward:", self.game.get_last_reward()) self.game.advance_action() # Print Score score = self.game.get_total_reward() print("Total Score:", score) self.game.close() def apprentice_run(self, test=False): ''' Method runs an apprentice data gathering. ''' # Initiate New Instance self.game.close() self.game.set_mode(Mode.SPECTATOR) self.game.set_screen_resolution(ScreenResolution.RES_800X600) self.game.set_window_visible(True) self.game.set_ticrate(30) self.game.init() self.game.new_episode() # Run Simulation while not self.game.is_episode_finished(): self.game.advance_action() self.game.close()
class ViZDoom(Environment): """ [ViZDoom](https://github.com/mwydmuch/ViZDoom) environment adapter (specification key: `vizdoom`). Args: level (string): ViZDoom configuration file (<span style="color:#C00000"><b>required</b></span>). include_variables (bool): Whether to include game variables to state (<span style="color:#00C000"><b>default</b></span>: false). factored_action (bool): Whether to use factored action representation (<span style="color:#00C000"><b>default</b></span>: false). visualize (bool): Whether to visualize interaction (<span style="color:#00C000"><b>default</b></span>: false). frame_skip (int > 0): Number of times to repeat an action without observing (<span style="color:#00C000"><b>default</b></span>: 12). seed (int): Random seed (<span style="color:#00C000"><b>default</b></span>: none). """ def __init__(self, level, visualize=False, include_variables=False, factored_action=False, frame_skip=12, seed=None): from vizdoom import DoomGame, Mode, ScreenFormat, ScreenResolution self.config_file = level self.include_variables = include_variables self.factored_action = factored_action self.visualize = visualize self.frame_skip = frame_skip self.environment = DoomGame() self.environment.load_config(self.config_file) if self.visualize: self.environment.set_window_visible(True) self.environment.set_mode(Mode.ASYNC_PLAYER) else: self.environment.set_window_visible(False) self.environment.set_mode(Mode.PLAYER) # e.g. CRCGCB, RGB24, GRAY8 self.environment.set_screen_format(ScreenFormat.RGB24) # e.g. RES_320X240, RES_640X480, RES_1920X1080 self.environment.set_screen_resolution(ScreenResolution.RES_640X480) self.environment.set_depth_buffer_enabled(False) self.environment.set_labels_buffer_enabled(False) self.environment.set_automap_buffer_enabled(False) if seed is not None: self.environment.setSeed(seed) self.environment.init() self.state_shape = (640, 480, 3) self.num_variables = self.environment.get_available_game_variables_size( ) self.num_buttons = self.environment.get_available_buttons_size() self.actions = [ tuple(a) for a in itertools.product([0, 1], repeat=self.num_buttons) ] def __str__(self): return super().__str__() + '({})'.format(self.config_file) def states(self): if self.include_variables: return OrderedDict(screen=dict(type='float', shape=self.state_shape), variables=dict(type='float', shape=self.num_variables)) else: return dict(type='float', shape=self.state_shape) def actions(self): if self.factored_action: return dict(type='bool', shape=self.num_buttons) else: return dict(type='int', shape=(), num_values=len(self.actions)) def close(self): self.environment.close() self.environment = None def get_states(self): state = self.environment.get_state() screen = state.screen_buffer.astype(dtype=np.float32) / 255.0 if self.include_variables: return OrderedDict(screen=screen, variables=state.game_variables) else: return screen def reset(self): self.environment.new_episode() return self.get_states() def execute(self, actions): if self.factored_action: action = np.where(actions, 1.0, 0.0) else: action = self.actions[actions] if self.visualize: self.environment.set_action(action) reward = 0.0 for _ in range(self.frame_skip): self.environment.advance_action() reward += self.environment.get_last_reward() else: reward = self.environment.make_action(action, self.frame_skip) terminal = self.environment.is_episode_finished() states = self.get_states() return states, terminal, reward
# Number of medkit pickup as measurement medkit = 0 # Number of poison pickup as measurement poison = 0 # Initial normalized measurements m_t = np.array([misc[0]/30.0, medkit/10.0, poison]) # Goal goal = np.array([1.0, 1.0, -1.0] * len(timesteps)) # Goal for Inference (Can change during test-time) inference_goal = goal is_terminated = game.is_episode_finished() # Start training epsilon = agent.initial_epsilon GAME = 0 t = 0 max_life = 0 # Maximum episode life (Proxy for agent performance) life = 0 # Buffer to compute rolling statistics life_buffer = [] while not game.is_episode_finished(): loss = 0 r_t = 0
def play(self): # Create DoomGame instance. It will run the game and communicate with you. print ("Initializing doom...") game = DoomGame() game.load_config("./examples/config/deepdoomplayer.cfg") game.init() print ("Doom initialized.") episodes = 1 training_steps_per_epoch = 100 sleep_time = 0.100 train_episodes_finished = 0 train_rewards = [] for epoch in range(episodes): train_loss = [] game.new_episode() while(train_episodes_finished < 20 ): sleep(sleep_time) if game.is_episode_finished(): r = game.get_total_reward() train_rewards.append(r) game.new_episode() train_episodes_finished += 1 self._last_state = None self.last_action[1] = 1 # first frame must be handled differently if self.last_state is None: # the _last_state will contain the image data from the last self.state_frames frames self.last_state = np.stack(tuple(self.convert_image(game.get_state().image_buffer) for _ in range(self.state_frames)), axis=2) continue reward = game.make_action(DeepDoomPlayer.define_keys_to_action_pressed(self.last_action), 7) reward *= 0.01 imagebuffer = game.get_state().image_buffer if imagebuffer is None: terminal = True screen_resized_binary = np.zeros((40,40)) imagebufferlast = imagebuffer if imagebuffer is not None: terminal = False screen_resized_binary = self.convert_image(imagebuffer) # add dimension screen_resized_binary = np.expand_dims(screen_resized_binary, axis=2) current_state = np.append(self.last_state[:, :, 1:], screen_resized_binary, axis=2) self.last_state = current_state self.last_action = self.choose_next_action_only_on_q() print (train_episodes_finished, "training episodes played.") print ("Training results:") train_rewards = np.array(train_rewards) print ("mean:", train_rewards.mean(), "std:", train_rewards.std(), "max:", train_rewards.max(), "min:", train_rewards.min()) # It will be done automatically anyway but sometimes you need to do it in the middle of the program... game.close() self._last_state = None
# Creates all possible actions depending on how many buttons there are. actions_num = game.get_available_buttons_size() actions = [] for perm in it.product([False, True], repeat=actions_num): actions.append(list(perm)) episodes = 10 sleep_time = 0.028 for i in range(episodes): print("Episode #" + str(i + 1)) # Not needed for the first episdoe but the loop is nicer. game.new_episode() while not game.is_episode_finished(): # Gets the state and possibly to something with it s = game.get_state() img = s.image_buffer misc = s.game_variables # Makes a random action and save the reward. r = game.make_action(choice(actions)) # Makes a "prolonged" action and skip frames: # skiprate = 3 # r = game.make_action(choice(actions), skiprate) # The same could be achieved with: # game.set_action(choice(actions))
game.new_episode() r = -0.01 #base s = None while True: msg = socket.recv() if msg == "state": s = game.get_state() img = s.image_buffer img = scipy.misc.imresize(img, (84, 84, 3)) scipy.misc.imsave('../games/current' + str(port) + '.png', img) socket.send( get_state('../games/current' + str(port) + '.png', r, game.is_episode_finished())) elif msg == "step": socket.send("action") a_indx = socket.recv() a_indx = int(a_indx) - 1 if a_indx == 1 or a_indx == 2: for ii in range(5): r = game.make_action(actions[a_indx]) else: r = game.make_action(actions[a_indx]) # for our toy world, ammo is reward r = s.game_variables[0] - 50 #50 is baseline if r == 0: r = -0.01