def test_environment(): game = DoomGame() # https://github.com/simoninithomas/Deep_reinforcement_learning_Course/blob/master/Deep%20Q%20Learning/Doom/basic.cfg game.load_config('basic.cfg') game.set_doom_scenario_path('basic.wad') game.init() shoot = [0, 0, 1] left = [1, 0, 0] right = [0, 1, 0] actions = [shoot, left, right] episodes = 10 for i in range(episodes): game.new_episode() while not game.is_episode_finished(): state = game.get_state() img = state.screen_buffer misc = state.game_variables action = random.choice(actions) print('Action', action) reward = game.make_action(action) print('Reward', reward) time.sleep(0.02) print('Result', game.get_total_reward()) time.sleep(2) game.close()
class Experiment(object): """ Used to perform experiment combined with a Agent Main methods : - """ def __init__(self, scenario, action_builder, reward_builder, logger, living_reward=0, custom_reward=False, score_variable='FRAGCOUNT', game_features=[], freedoom=True, screen_resolution='RES_400X225', screen_format='CRCGCB', use_screen_buffer=True, use_depth_buffer=False, use_labels_buffer=True, mode='PLAYER', player_rank=0, players_per_game=1, render_hud=False, render_minimal_hud=False, render_crosshair=True, render_weapon=True, render_decals=False, render_particles=False, render_effects_sprites=False, respawn_protect=True, spawn_farthest=True, name='Hubert_Bonnisseur_de_la_Bate', visible=False, n_bots=0, use_scripted_marines=None, doom_skill=2): """ Create a new game. render_decals: marks on the walls render_particles: particles like for impacts / traces render_effects_sprites: gun puffs / blood splats color: 0 - green, 1 - gray, 2 - brown, 3 - red, 4 - light gray, 5 - light brown, 6 - light red, 7 - light blue """ # game resources game_filename = 'freedoom2.wad' self.scenario = scenario self.scenario_path = os.path.join(PATH, 'scenarios/{}.wad'.format(scenario)) self.game_path = os.path.join(PATH, game_filename) # check parameters assert os.path.isfile(self.scenario_path) assert os.path.isfile(self.game_path) assert hasattr(GameVariable, score_variable) assert hasattr(ScreenResolution, screen_resolution) assert hasattr(ScreenFormat, screen_format) assert use_screen_buffer or use_depth_buffer assert hasattr(Mode, mode) assert not (render_minimal_hud and not render_hud) assert len(name.strip()) > 0 assert n_bots >= 0 assert (type(use_scripted_marines) is bool or use_scripted_marines is None and n_bots == 0) assert 0 <= doom_skill <= 4 assert 0 < players_per_game assert 0 <= player_rank # screen buffer / depth buffer / labels buffer / mode self.screen_resolution = screen_resolution self.screen_format = screen_format self.use_screen_buffer = use_screen_buffer self.use_depth_buffer = use_depth_buffer self.game_features = parse_game_features(game_features, logger) self.use_labels_buffer = use_labels_buffer self.use_game_features = any(self.game_features) self.mode = mode # rendering options self.render_hud = render_hud self.render_minimal_hud = render_minimal_hud self.render_crosshair = render_crosshair self.render_weapon = render_weapon self.render_decals = render_decals self.render_particles = render_particles self.render_effects_sprites = render_effects_sprites # window visibility self.visible = visible # actor reward ''' used for reward shaping (LSTM & Curiosity A3C) ''' self.reward_builder = reward_builder self.living_reward = living_reward self.custom_reward = custom_reward # number of bots in the game self.n_bots = n_bots self.use_scripted_marines = use_scripted_marines # doom skill (ie difficulty of the game) self.doom_skill = doom_skill # bot name self.name = name # action builder self.action_builder = action_builder # save game statistics for each episode (used for model comparison and reward shaping) self.stats = {} # use logging for DEBUG purpose self.logger = logger #============================================================================== # Game start #============================================================================== def start(self, map_id, episode_time=None, log_events=False): """ Start the game. If `episode_time` is given, the game will end after the specified time. """ # Episode time self.episode_time = episode_time # initialize the game self.game = DoomGame() self.game.set_doom_scenario_path(self.scenario_path) self.game.set_doom_game_path(self.game_path) # map assert map_id > 0 self.map_id = map_id self.game.set_doom_map('map{:02d}'.format(map_id)) # time limit if episode_time is not None: self.game.set_episode_timeout(episode_time) # Save statistics for this map self.stats[self.map_id] = [] # log events that happen during the game (useful for testing) # self.log_events = log_events # game parameters args = [] # screen buffer / depth buffer / labels buffer / mode screen_resolution = getattr(ScreenResolution, self.screen_resolution) self.game.set_screen_resolution(screen_resolution) self.game.set_screen_format(getattr(ScreenFormat, self.screen_format)) self.game.set_depth_buffer_enabled(self.use_depth_buffer) self.game.set_labels_buffer_enabled(self.use_labels_buffer) self.game.set_mode(getattr(Mode, self.mode)) # rendering options self.game.set_render_hud(self.render_hud) self.game.set_render_minimal_hud(self.render_minimal_hud) self.game.set_render_crosshair(self.render_crosshair) self.game.set_render_weapon(self.render_weapon) self.game.set_render_decals(self.render_decals) self.game.set_render_particles(self.render_particles) self.game.set_render_effects_sprites(self.render_effects_sprites) # deathmatch mode # players will respawn automatically after they die # autoaim is disabled for all players # args.append('-deathmatch') args.append('+sv_forcerespawn 1') args.append('+sv_noautoaim 1') # agent name args.append('+name %s' % self.name) # load parameters self.args = args for arg in args: self.game.add_game_args(arg) # window visibility self.game.set_window_visible(self.visible) # define available buttons self.action_builder.set_buttons(self.game) # doom skill (https://zdoom.org/wiki/GameSkill) self.game.set_doom_skill(self.doom_skill + 1) # define basic rewards self.game.set_living_reward(self.living_reward) # start the game self.game.init() # initialize the game after player spawns self.initialize_game() self.logger.info('start_game') #============================================================================== # Game statistics #============================================================================== def update_game_properties(self): """ Update game properties. """ # read game variables new_v = { k: self.game.get_game_variable(v) for k, v in GAME_FEATURES.items() } new_v = { k: (int(v) if v.is_integer() else float(v)) for k, v in new_v.items() } # update game properties self.prev_properties = self.properties self.properties = new_v def update_game_statistics(self): """ Calculate game statistics and store them in the running stats dict """ stats = self.run_stats # init r if custom rewards r = [] # calculate stats # kill d = self.properties['kill_count'] - self.prev_properties['kill_count'] if d > 0: r.extend(d * ['kill_count']) stats['kills'] += d # death if self.game.is_player_dead(): r.append('dead') stats['deaths'] += 1 # suicide if self.properties['frag_count'] < self.prev_properties['frag_count']: r.append('suicide') stats['suicides'] += 1 # found health d = self.properties['health'] - self.prev_properties['health'] if d != 0: if d > 0: r.append('medikit') stats['medikit'] += 1 stats['health'] = self.properties['health'] # health lost d = self.properties['damage_count'] - self.prev_properties[ 'damage_count'] if d > 0: r.append('health_lost') # found armor d = self.properties['armor'] - self.prev_properties['armor'] if d != 0: if d > 0: r.append('armor') stats['armor'] += 1 # found weapon if self.prev_properties['sel_weapon'] != self.properties['sel_weapon']: r.append('weapon') stats['found_weapon'] += 1 # found / lost ammo d = self.properties['sel_ammo'] - self.prev_properties['sel_ammo'] if self.prev_properties['sel_weapon'] == self.properties['sel_weapon']: if d != 0: if d > 0: r.append('ammo') stats['ammo'] += 1 else: r.append('use_ammo') # auxiliary stats not used for rewards stats['frag_count'] = self.properties['frag_count'] return r def calculate_final_stats(self): """ Calculate the final stats from the running stats """ self.run_stats['k/d'] = self.run_stats['kills'] * 1.0 / max( 1, self.run_stats['deaths']) #============================================================================== # Game handling #============================================================================== def is_player_dead(self): """ Detect whether the player is dead. """ return self.game.is_player_dead() def is_episode_finished(self): """ Return whether the episode is finished. This should only be the case after the episode timeout. """ return self.game.is_episode_finished() def is_final(self): """ Return whether the game is in a final state. """ return self.is_player_dead() or self.is_episode_finished() def reset(self): """ Reset the game if necessary. This can be because: - we reach the end of an episode (we restart the game) - because the agent is dead (we make it respawn) """ self.stats[self.map_id].append(self.run_stats) # if the player is dead if self.is_player_dead(): # respawn it (deathmatch mode) if self.episode_time is None: self.respawn_player() # or reset the episode (episode ends when the agent dies) else: self.new_episode() # start a new episode if it is finished if self.is_episode_finished(): self.new_episode() # deal with a ViZDoom issue # while self.is_player_dead(): # logger.warning('Player %i is still dead after respawn.' % # self.params.player_rank) # self.respawn_player() def respawn_player(self): """ Respawn the player on death. """ assert self.is_player_dead() self.game.respawn_player() # self.log('Respawn player') self.initialize_game() def new_episode(self): """ Start a new episode. """ # init new stats for the episode self.run_stats = {k: 0 for k in STAT_KEYS} # init new game self.game.new_episode() # init episode properties self.initialize_game() # self.log('New episode') def initialize_game(self): """ Reset game properties """ new_v = { k: self.game.get_game_variable(v) for k, v in GAME_FEATURES.items() } new_v = { k: (int(v) if v.is_integer() else float(v)) for k, v in new_v.items() } self.stats self.prev_properties = None self.properties = new_v def close(self): """ Close the current experiment. """ self.game.close() def observe_state(self, variable_names, feature_names): """ Observe the current state of the game. """ # read game state screen, variables, game_features = process_game_info( self.game, variable_names, feature_names) # last_states.append(GameState(screen, variables, game_features)) # return the screen and the game features return screen, variables, game_features def make_action(self, action, variable_names, feature_names, frame_skip=1, sleep=None): """ Process action and give the next state according to the game motor Inputs : action : frame_skips : nb of frames during which the same action is performed sleep : pause game for sleep seconds in order to smooth visualization Output : reward defined in the game motor or customized screen | variables | of the next state (if not final state) game_features | """ assert frame_skip >= 1 # convert selected action to the ViZDoom action format action = self.action_builder.get_action(action) # smooth visualization if needed for make if self.visible: r = 0 for _ in range(frame_skip): r += self.game.make_action(action) # death or episode finished if self.is_player_dead() or self.is_episode_finished(): break # sleep for smooth visualization if sleep is not None: time.sleep(sleep) else: r = self.game.make_action(action, frame_skip) # observe resulting state if not self.is_final(): screen, variables, game_features = self.observe_state( variable_names, feature_names) else: screen = None variables = None game_features = None # update game statistics and return custom rewards self.update_game_properties() list_r = self.update_game_statistics() r_bis = 0 if self.custom_reward and self.reward_builder: r_bis = self.reward_builder.get_reward(list_r) return r + r_bis, screen, variables, game_features
class Game(object): def __init__(self, scenario, action_builder, score_variable='FRAGCOUNT', freedoom=True, screen_resolution='RES_400X225', screen_format='CRCGCB', use_screen_buffer=True, use_depth_buffer=False, labels_mapping='', game_features='', mode='ASYNC_PLAYER', render_hud=False, render_minimal_hud=False, render_crosshair=True, render_weapon=True, render_decals=False, render_particles=False, render_effects_sprites=False, respawn_protect=True, spawn_farthest=True, freelook=False, name='LUBAN', color=0, visible=False, n_bots=0, use_scripted_marines=None, doom_skill=2): """ Create a new game. score_variable: indicates in which game variable the user score is stored. by default it's in FRAGCOUNT, but the score in ACS against built-in AI bots can be stored in USER1, USER2, etc. render_decals: marks on the walls render_particles: particles like for impacts / traces render_effects_sprites: gun puffs / blood splats color: 0 - green, 1 - gray, 2 - brown, 3 - red, 4 - light gray, 5 - light brown, 6 - light red, 7 - light blue """ # game resources game_filename = '%s.wad' % ('freedoom2' if freedoom else 'Doom2') self.scenario_path = os.path.join(RESOURCES_DIR, '%s.wad' % scenario) self.game_path = os.path.join(RESOURCES_DIR, game_filename) print(self.scenario_path) print(self.game_path) # check parameters assert os.path.isfile(self.scenario_path) assert os.path.isfile(self.game_path) assert hasattr(GameVariable, score_variable) assert hasattr(ScreenResolution, screen_resolution) assert hasattr(ScreenFormat, screen_format) assert use_screen_buffer or use_depth_buffer assert hasattr(Mode, mode) assert len(name.strip()) > 0 and color in range(8) assert n_bots >= 0 assert (type(use_scripted_marines) is bool or use_scripted_marines is None and n_bots == 0) assert 0 <= doom_skill <= 4 # action builder self.action_builder = action_builder # add the score variable to the game variables list self.score_variable = score_variable game_variables.append(('score', getattr(GameVariable, score_variable))) # screen buffer / depth buffer / labels buffer / mode self.screen_resolution = screen_resolution self.screen_format = screen_format self.use_screen_buffer = use_screen_buffer self.use_depth_buffer = use_depth_buffer self.labels_mapping = parse_labels_mapping(labels_mapping) self.game_features = parse_game_features(game_features) self.use_labels_buffer = self.labels_mapping is not None self.use_game_features = any(self.game_features) self.mode = mode # rendering options self.render_hud = render_hud self.render_minimal_hud = render_minimal_hud self.render_crosshair = render_crosshair self.render_weapon = render_weapon self.render_decals = render_decals self.render_particles = render_particles self.render_effects_sprites = render_effects_sprites # respawn invincibility / distance self.respawn_protect = respawn_protect self.spawn_farthest = spawn_farthest # freelook / agent name / agent color self.freelook = freelook self.name = name.strip() self.color = color # window visibility self.visible = visible # game statistics self.stat_keys = [ 'distance', 'kills', 'deaths', 'suicides', 'frags', 'k/d', 'medikits', 'armors', 'pistol', 'shotgun', 'chaingun', 'rocketlauncher', 'plasmarifle', 'bfg9000', 'bullets', 'shells', 'rockets', 'cells' ] self.statistics = {} # number of bots in the game self.n_bots = n_bots self.use_scripted_marines = use_scripted_marines # doom skill self.doom_skill = doom_skill # manual control self.count_non_forward_actions = 0 self.count_non_turn_actions = 0 def update_game_variables(self): """ Check and update game variables. """ # read game variables new_v = {k: self.game.get_game_variable(v) for k, v in game_variables} assert all(v.is_integer() or k[-2:] in ['_x', '_y', '_z'] for k, v in new_v.items()) new_v = { k: (int(v) if v.is_integer() else float(v)) for k, v in new_v.items() } health = new_v['health'] armor = new_v['armor'] sel_weapon = new_v['sel_weapon'] sel_ammo = new_v['sel_ammo'] bullets = new_v['bullets'] shells = new_v['shells'] rockets = new_v['rockets'] cells = new_v['cells'] fist = new_v['fist'] pistol = new_v['pistol'] shotgun = new_v['shotgun'] chaingun = new_v['chaingun'] rocketlauncher = new_v['rocketlauncher'] plasmarifle = new_v['plasmarifle'] bfg9000 = new_v['bfg9000'] # check game variables if sel_weapon == -1: new_v['sel_weapon'] = 1 sel_weapon = 1 if sel_ammo == -1: new_v['sel_ammo'] = 0 sel_ammo = 0 assert sel_weapon in range(1, 8), sel_weapon assert sel_ammo >= 0, sel_ammo assert all(x in [0, 1] for x in [ fist, pistol, shotgun, chaingun, rocketlauncher, plasmarifle, bfg9000 ]) assert 0 <= health <= 200 or health < 0 and self.game.is_player_dead() assert 0 <= armor <= 200, (health, armor) assert 0 <= bullets <= 200 and 0 <= shells <= 50 assert 0 <= rockets <= 50 and 0 <= cells <= 300 # fist if sel_weapon == 1: assert sel_ammo == 0 # pistol elif sel_weapon == 2: assert pistol and sel_ammo == bullets # shotgun elif sel_weapon == 3: assert shotgun and sel_ammo == shells # chaingun elif sel_weapon == 4: assert chaingun and sel_ammo == bullets # rocket launcher elif sel_weapon == 5: assert rocketlauncher and sel_ammo == rockets # plasma rifle elif sel_weapon == 6: assert plasmarifle and sel_ammo == cells # BFG9000 elif sel_weapon == 7: assert bfg9000 and sel_ammo == cells # update actor properties self.prev_properties = self.properties self.properties = new_v def update_statistics(self, action): """ Update statistics of the current game based on the previous and the current properties for evaluating the agent performance. """ stats = self.statistics[self.map_id] # we need to know the current and previous properties assert self.prev_properties is not None and self.properties is not None # distance moving_forward = action[self.mapping['MOVE_FORWARD']] turn_left = action[self.mapping['TURN_LEFT']] turn_right = action[self.mapping['TURN_RIGHT']] if moving_forward and not (turn_left or turn_right): diff_x = self.properties['position_x'] - self.prev_properties[ 'position_x'] diff_y = self.properties['position_y'] - self.prev_properties[ 'position_y'] distance = math.sqrt(diff_x**2 + diff_y**2) stats['distance'] += distance # kill d = self.properties['score'] - self.prev_properties['score'] if d > 0: stats['kills'] += d # death if self.game.is_player_dead(): stats['deaths'] += 1 # suicide if self.properties['frag_count'] < self.prev_properties['frag_count']: stats['suicides'] += 1 # found / lost health d = self.properties['health'] - self.prev_properties['health'] if d != 0: if d > 0: stats['medikits'] += 1 # found / lost armor d = self.properties['armor'] - self.prev_properties['armor'] if d != 0: if d > 0: stats['armors'] += 1 # found weapon for i, weapon in enumerate([ 'pistol', 'shotgun', 'chaingun', 'rocketlauncher', 'plasmarifle', 'bfg9000' ]): if self.prev_properties[weapon] == self.properties[weapon]: continue stats[weapon] += 1 # found / lost ammo for ammo in ['bullets', 'shells', 'rockets', 'cells']: d = self.properties[ammo] - self.prev_properties[ammo] if d != 0: if d > 0: stats[ammo] += 1 def start(self, map_id, episode_time=None, manual_control=False): """ Start the game. If `episode_time` is given, the game will end after the specified time. """ assert type(manual_control) is bool self.manual_control = manual_control # Save statistics for this map self.statistics[map_id] = {k: 0 for k in self.stat_keys} # Episode time self.episode_time = episode_time # initialize the game self.game = DoomGame() self.game.set_doom_scenario_path(self.scenario_path) self.game.set_doom_game_path(self.game_path) # map assert map_id > 0 self.map_id = map_id self.game.set_doom_map("map%02i" % map_id) # time limit if episode_time is not None: self.game.set_episode_timeout(int(35 * episode_time)) # game parameters args = [] # host / server args.append('-host 1') # screen buffer / depth buffer / labels buffer / mode screen_resolution = getattr(ScreenResolution, self.screen_resolution) self.game.set_screen_resolution(screen_resolution) self.game.set_screen_format(getattr(ScreenFormat, self.screen_format)) self.game.set_depth_buffer_enabled(self.use_depth_buffer) self.game.set_labels_buffer_enabled(self.use_labels_buffer or self.use_game_features) self.game.set_mode(getattr(Mode, self.mode)) # rendering options self.game.set_render_hud(self.render_hud) self.game.set_render_minimal_hud(self.render_minimal_hud) self.game.set_render_crosshair(self.render_crosshair) self.game.set_render_weapon(self.render_weapon) self.game.set_render_decals(self.render_decals) self.game.set_render_particles(self.render_particles) self.game.set_render_effects_sprites(self.render_effects_sprites) # deathmatch mode # players will respawn automatically after they die # autoaim is disabled for all players args.append('-deathmatch') args.append('+sv_forcerespawn 1') args.append('+sv_noautoaim 1') # respawn invincibility / distance # players will be invulnerable for two second after spawning # players will be spawned as far as possible from any other players args.append('+sv_respawnprotect %i' % self.respawn_protect) args.append('+sv_spawnfarthest %i' % self.spawn_farthest) # freelook / agent name / agent color args.append('+freelook %i' % (1 if self.freelook else 0)) args.append('+name %s' % self.name) args.append('+colorset %i' % self.color) # enable the cheat system (so that we can still # send commands to the game in self-play mode) args.append('+sv_cheats 1') # load parameters self.args = args for arg in args: self.game.add_game_args(arg) # window visibility self.game.set_window_visible(self.visible) # available buttons self.mapping = add_buttons(self.game, self.action_builder.available_buttons) # doom skill self.game.set_doom_skill(self.doom_skill + 1) # start the game self.game.init() # initialize the game after player spawns self.initialize_game() def update_bots(self): """ Add built-in AI bots. There are two types of AI: built-in AI and ScriptedMarines. """ # only the host takes care of the bots if self.use_scripted_marines: command = "pukename set_value always 2 %i" % self.n_bots self.game.send_game_command(command) else: self.game.send_game_command("removebots") for _ in range(self.n_bots): self.game.send_game_command("addbot") def is_player_dead(self): """ Detect whether the player is dead. """ return self.game.is_player_dead() def is_episode_finished(self): """ Return whether the episode is finished. This should only be the case after the episode timeout. """ return self.game.is_episode_finished() def is_final(self): """ Return whether the game is in a final state. """ return self.is_player_dead() or self.is_episode_finished() def new_episode(self): """ Start a new episode. """ assert self.is_episode_finished() or self.is_player_dead() self.game.new_episode() self.initialize_game() def respawn_player(self): """ Respawn the player on death. """ assert self.is_player_dead() self.game.respawn_player() self.initialize_game() def initialize_game(self): """ Initialize the game after the player spawns / respawns. Be sure that properties from the previous life are not considered in this one. """ # generate buffers game_state = self.game.get_state() self._screen_buffer = game_state.screen_buffer self._depth_buffer = game_state.depth_buffer self._labels_buffer = game_state.labels_buffer self._labels = game_state.labels # actor properties self.prev_properties = None self.properties = None # advance a few steps to avoid bugs due to initial weapon changes in ACS self.game.advance_action(SKIP_INITIAL_ACTIONS) self.update_game_variables() # if there are bots in the game, and if this is a new game self.update_bots() def randomize_textures(self, randomize): """ Randomize the textures of the map. """ assert type(randomize) is bool randomize = 1 if randomize else 0 self.game.send_game_command("pukename set_value always 4 %i" % randomize) def init_bots_health(self, health): """ Initial bots health. """ assert self.use_scripted_marines or health == 100 assert 0 < health <= 100 self.game.send_game_command("pukename set_value always 5 %i" % health) def make_action(self, action, frame_skip=1, sleep=None): """ Make an action. If `sleep` is given, the network will wait `sleep` seconds between each action. """ assert frame_skip >= 1 # convert selected action to the ViZDoom action format action = self.action_builder.get_action(action) # select agent favorite weapon for weapon_name, weapon_ammo, weapon_id in WEAPONS_PREFERENCES: min_ammo = 40 if weapon_name == 'bfg9000' else 1 if self.properties[weapon_name] > 0 and self.properties[ weapon_ammo] >= min_ammo: if self.properties['sel_weapon'] != weapon_id: switch_action = ( [False] * self.mapping['SELECT_WEAPON%i' % weapon_id]) + [True] action = action + switch_action[len(action):] break if action[self.mapping['MOVE_FORWARD']]: self.count_non_forward_actions = 0 else: self.count_non_forward_actions += 1 if action[self.mapping['TURN_LEFT']] or action[ self.mapping['TURN_RIGHT']]: self.count_non_turn_actions = 0 else: self.count_non_turn_actions += 1 if self.manual_control and (self.count_non_forward_actions >= 30 or self.count_non_turn_actions >= 60): manual_action = [False] * len(action) manual_action[self.mapping['TURN_RIGHT']] = True manual_action[self.mapping['SPEED']] = True if self.count_non_forward_actions >= 30: manual_action[self.mapping['MOVE_FORWARD']] = True manual_repeat = 40 self.count_non_forward_actions = 0 self.count_non_turn_actions = 0 else: manual_action = None # if we are visualizing the experiment, show all the frames one by one if self.visible: if manual_action is not None: for _ in range(manual_repeat): self.game.make_action(manual_action) else: for _ in range(frame_skip): self.game.make_action(action) # death or episode finished if self.is_player_dead() or self.is_episode_finished(): break # sleep for smooth visualization if sleep is not None: time.sleep(sleep) else: if manual_action is not None: self.game.make_action(manual_action, manual_repeat) else: self.game.make_action(action, frame_skip) # generate buffers game_state = self.game.get_state() if game_state is not None: self._screen_buffer = game_state.screen_buffer self._depth_buffer = game_state.depth_buffer self._labels_buffer = game_state.labels_buffer self._labels = game_state.labels # update game variables / statistics rewards self.update_game_variables() self.update_statistics(action) def close(self): """ Close the current game. """ self.game.close() def print_statistics(self, eval_time=None): """ Print agent statistics. If `map_id` is is given, statistics are given for the specified map only. """ map_ids = self.statistics.keys() assert len(map_ids) == 1 for v in self.statistics.values(): assert set(self.stat_keys) == set(v.keys()) # number of frags (kills - suicides) # 100% accurate if the number of frags is given by 'FRAGCOUNT' # almost 100% accurate if it is based on an internal ACS variable for v in self.statistics.values(): v['frags'] = v['kills'] - v['suicides'] # Kills / Deaths # 100% accurate if the number of kills is given by an ACS variable # almost 100% accurate if it is based on 'FRAGCOUNT' for v in self.statistics.values(): v['k/d'] = v['kills'] * 1.0 / max(1, v['deaths']) print("******************Game statistics summary********************") print("Map%02d" % self.map_id) for item in self.stat_keys: print(item + ":\t%d" % self.statistics[self.map_id][item]) def observe_state(self, params, last_states): """ Observe the current state of the game. """ # read game state screen, game_features = process_buffers(self, params) variables = [self.properties[x[0]] for x in params.game_variables] last_states.append(GameState(screen, variables, game_features)) # update most recent states if len(last_states) == 1: last_states.extend([last_states[0]] * (params.hist_size - 1)) else: assert len(last_states) == params.hist_size + 1 del last_states[0] # return the screen and the game features return screen, game_features
class DoomEnv(gym.Env, EzPickle): metadata = { 'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 35 } def __init__(self, level='deathmatch', obs_type='ram'): # super(DoomEnv, self).__init__() EzPickle.__init__(self, level.split('.')[0], obs_type) assert obs_type in ('ram', 'image') level = level.split('.')[0] Config.init(level) self.curr_seed = 0 self.game = DoomGame() self.lock = (DoomLock()).get_lock() self.level = level self.obs_type = obs_type self.tick = 4 self._mode = 'algo' self.is_render_in_human_mode = True self.is_game_initialized = False self.is_level_loaded = False self.viewer = None self.set_game(self.level, resolution=None, render=True) print() # todo: add frame skip option by using tick def step(self, action): reward = 0.0 # self.tick = 4 if self._mode == 'algo': if self.tick: reward = self.game.make_action(action, self.tick) else: reward = self.game.make_action(action) # self.game.set_action(action) # self.game.advance_action(4) # reward = self.game.get_last_reward() return self.get_obs(), reward, self.isDone(), self.get_info() def reset(self): if not self.is_game_initialized: self.__load_level() self.__init_game() self.__start_episode() return self.get_obs() def render(self, mode='human', **kwargs): if 'close' in kwargs and kwargs['close']: if self.viewer is not None: self.viewer.close() self.viewer = None return if mode == 'human' and not self.is_render_in_human_mode: return img = self.get_image() if mode == 'rgb_array': return img elif mode is 'human': if self.viewer is None: self.viewer = rendering.SimpleImageViewer() self.viewer.imshow(img) def close(self): with self.lock: self.game.close() def seed(self, seed=None): self.curr_seed = seeding.hash_seed(seed) % 2**32 return [self.curr_seed] # ================================== GETTERS SETTERS =============================================================== def set_game(self, level, resolution, render): self.__configure() self.__load_level(level) self.__set_resolution(resolution) self.__set_obs_and_ac_space() self.__set_player(render) def __configure(self, lock=None, **kwargs): self.seed() if lock is not None: self.lock = lock def __load_level(self, level=None): if level is not None: self.level = level.split('.')[0] self.is_level_loaded = False if self.is_level_loaded: return if self.is_game_initialized: self.is_game_initialized = False self.game.close() self.game = DoomGame() if not self.is_game_initialized: self.game.set_vizdoom_path(Config.VIZDOOM_PATH) self.game.set_doom_game_path(Config.FREEDOOM_PATH) # Common settings self.record_file_path = Config.RECORD_FILE_PATH self.game.load_config(Config.VIZDOOM_SCENARIO_PATH + Config.DOOM_SETTINGS[self.level][Config.CONFIG]) self.game.set_doom_scenario_path( Config.VIZDOOM_SCENARIO_PATH + Config.DOOM_SETTINGS[self.level][Config.SCENARIO]) if Config.DOOM_SETTINGS[self.level][Config.MAP] != '': self.game.set_doom_map( Config.DOOM_SETTINGS[self.level][Config.MAP]) self.game.set_doom_skill( Config.DOOM_SETTINGS[self.level][Config.DIFFICULTY]) self.allowed_actions = Config.DOOM_SETTINGS[self.level][Config.ACTIONS] self.available_game_variables = Config.DOOM_SETTINGS[self.level][ Config.GAME_VARIABLES] self.is_level_loaded = True def __set_resolution(self, resolution=None): if resolution is None: resolution = Config.DEFAULT_SCREEN_RESOLUTION resolution_l = resolution.lower() if resolution_l not in resolutions: raise gym.error.Error( 'Error - The specified resolution "{}" is not supported by Vizdoom.\n The list of valid' 'resolutions: {}'.format(resolution, resolutions)) if '_' in resolution_l: resolution_l = resolution_l.split('_')[1] self.scr_width = int(resolution_l.split("x")[0]) self.scr_height = int(resolution_l.split("x")[1]) self.game.set_screen_resolution( getattr(ScreenResolution, 'RES_{}X{}'.format(self.scr_width, self.scr_height))) self.screen_format = self.game.get_screen_format() self.screen_height = self.game.get_screen_height() self.screen_width = self.game.get_screen_width() def __set_obs_and_ac_space(self): if self.obs_type == 'ram': self.observation_space = spaces.Box( low=0, high=255, dtype=np.uint8, shape=(len(self.available_game_variables), )) elif self.obs_type == 'image': # self.observation_space = self.screen_resized self.observation_space = spaces.Box(low=0, high=255, shape=(self.scr_height, self.scr_width, 3), dtype=np.uint8) else: raise error.Error('Unrecognized observation type: {}'.format( self.obs_type)) if self.screen_format in inverted_screen_formats: self.dummy_screen = np.zeros(shape=(3, self.scr_height, self.scr_width), dtype=np.uint8) else: self.dummy_screen = np.zeros(shape=(self.scr_height, self.scr_width, 3), dtype=np.uint8) self.dummy_ram = [0] * len(self.available_game_variables) self.available_action_codes = [ list(a) for a in it.product([0, 1], repeat=self.game.get_available_buttons_size()) ] # self.__delete_conflict_actions() self.action_space = spaces.MultiDiscrete( [len(self.available_action_codes)]) def __set_player(self, render=True): self.game.set_window_visible(render) self.game.set_mode(Mode.PLAYER) def __init_game(self): try: with self.lock: self.game.init() self.is_game_initialized = True except (ViZDoomUnexpectedExitException, ViZDoomErrorException): raise error.Error('Could not start the game.') def __start_episode(self): if self.curr_seed > 0: self.game.set_seed(self.curr_seed) self.curr_seed = 0 if self.record_file_path: self.game.new_episode(self.record_file_path) else: self.game.new_episode() return def getState(self): return self.game.get_state() def getLastAction(self): return self.game.get_last_action() def getButtonsNames(self, action): return action_to_buttons(self.allowed_actions, action) def get_info(self): info = { "LEVEL": self.level, "TOTAL_REWARD": round(self.game.get_total_reward(), 4) } state_variables = self.get_ram() for i in range(len(self.available_game_variables)): info[self.available_game_variables[i]] = state_variables[i] return info def get_ram(self): if not self.is_game_initialized: raise NotImplementedError( "The game was not initialized. Run env.reset() first!") try: ram = self.getState().game_variables except AttributeError: ram = self.dummy_ram return ram def get_image(self): try: screen = self.getState().screen_buffer.copy() except AttributeError: screen = self.dummy_screen return self.invert_screen(screen) def get_obs(self): if self.obs_type == 'ram': return self.get_ram() elif self.obs_type == 'image': return self.get_image() def isDone(self): return self.game.is_episode_finished() or self.game.is_player_dead( ) or self.getState() is None # =========================================== ============================================================== def invert_screen(self, img): if self.screen_format in inverted_screen_formats: return np.rollaxis(img, 0, 3) else: return img def __delete_conflict_actions(self): if self._mode == 'human': return action_codes_copy = self.available_action_codes.copy() print("Initial actions size: " + str(len(action_codes_copy))) for i in tqdm.trange(len(self.available_action_codes)): action = self.available_action_codes[i] ac_names = action_to_buttons(self.allowed_actions, action) if all(elem in ac_names for elem in ['MOVE_LEFT', 'MOVE_RIGHT']) or all( elem in ac_names for elem in ['MOVE_BACKWARD', 'MOVE_FORWARD']) or all( elem in ac_names for elem in ['TURN_RIGHT', 'TURN_LEFT']) or all( elem in ac_names for elem in ['SELECT_NEXT_WEAPON', 'SELECT_PREV_WEAPON']): action_codes_copy.remove(action) print("Final actions size: " + str(len(action_codes_copy))) self.available_action_codes = action_codes_copy def __initHumanPlayer(self): self._mode = 'human' self.__load_level() self.game.add_game_args('+freelook 1') self.game.set_window_visible(True) self.game.set_mode(Mode.SPECTATOR) self.is_render_in_human_mode = False self.__init_game() def advanceAction(self, tick=0): try: if tick: self.game.advance_action(tick) else: self.game.advance_action() return True except ViZDoomUnexpectedExitException: return False def playHuman(self): self.__initHumanPlayer() while not self.game.is_episode_finished( ) and not self.game.is_player_dead(): self.advanceAction() state = self.getState() if state is None: if self.record_file_path is None: self.game.new_episode() else: self.game.new_episode(self.record_file_path) state = self.getState() total_reward = self.game.get_total_reward() info = self.get_info() info["TOTAL_REWARD"] = round(total_reward, 4) print('===============================') print('State: #' + str(state.number)) print('Action: \t' + str(self.game.get_last_action()) + '\t (=> only allowed actions)') print('Reward: \t' + str(self.game.get_last_reward())) print('Total Reward: \t' + str(total_reward)) print('Variables: \n' + str(info)) sleep(0.02857) # 35 fps = 0.02857 sleep between frames print('===============================') print('Done') return
class Game(object): def __init__(self, scenario, action_builder, reward_values=None, score_variable='FRAGCOUNT', freedoom=True, screen_resolution='RES_400X225', screen_format='CRCGCB', use_screen_buffer=True, use_depth_buffer=False, labels_mapping='', game_features='', mode='PLAYER', player_rank=0, players_per_game=1, render_hud=False, render_minimal_hud=False, render_crosshair=True, render_weapon=True, render_decals=False, render_particles=False, render_effects_sprites=False, respawn_protect=True, spawn_farthest=True, freelook=False, name='Arnold', color=0, visible=False, n_bots=0, use_scripted_marines=None, doom_skill=2): """ Create a new game. score_variable: indicates in which game variable the user score is stored. by default it's in FRAGCOUNT, but the score in ACS against built-in AI bots can be stored in USER1, USER2, etc. render_decals: marks on the walls render_particles: particles like for impacts / traces render_effects_sprites: gun puffs / blood splats color: 0 - green, 1 - gray, 2 - brown, 3 - red, 4 - light gray, 5 - light brown, 6 - light red, 7 - light blue """ # game resources game_filename = '%s.wad' % ('freedoom2' if freedoom else 'Doom2') self.scenario_path = os.path.join(RESOURCES_DIR, 'scenarios', '%s.wad' % scenario) self.game_path = os.path.join(RESOURCES_DIR, game_filename) # check parameters assert os.path.isfile(self.scenario_path) assert os.path.isfile(self.game_path) assert hasattr(GameVariable, score_variable) assert hasattr(ScreenResolution, screen_resolution) assert hasattr(ScreenFormat, screen_format) assert use_screen_buffer or use_depth_buffer assert hasattr(Mode, mode) assert not (render_minimal_hud and not render_hud) assert len(name.strip()) > 0 and color in range(8) assert n_bots >= 0 assert (type(use_scripted_marines) is bool or use_scripted_marines is None and n_bots == 0) assert 0 <= doom_skill <= 4 assert 0 < players_per_game assert 0 <= player_rank # action builder self.action_builder = action_builder # add the score variable to the game variables list self.score_variable = score_variable game_variables.append(('score', getattr(GameVariable, score_variable))) self.player_rank = player_rank self.players_per_game = players_per_game # screen buffer / depth buffer / labels buffer / mode self.screen_resolution = screen_resolution self.screen_format = screen_format self.use_screen_buffer = use_screen_buffer self.use_depth_buffer = use_depth_buffer self.labels_mapping = parse_labels_mapping(labels_mapping) self.game_features = parse_game_features(game_features) self.use_labels_buffer = self.labels_mapping is not None self.use_game_features = any(self.game_features) self.mode = mode # rendering options self.render_hud = render_hud self.render_minimal_hud = render_minimal_hud self.render_crosshair = render_crosshair self.render_weapon = render_weapon self.render_decals = render_decals self.render_particles = render_particles self.render_effects_sprites = render_effects_sprites # respawn invincibility / distance self.respawn_protect = respawn_protect self.spawn_farthest = spawn_farthest # freelook / agent name / agent color self.freelook = freelook self.name = name.strip() self.color = color # window visibility self.visible = visible # actor reward self.reward_builder = RewardBuilder(self, reward_values) # game statistics self.stat_keys = [ 'kills', 'deaths', 'suicides', 'frags', 'k/d', 'medikits', 'armors', 'pistol', 'shotgun', 'chaingun', 'rocketlauncher', 'plasmarifle', 'bfg9000', 'bullets', 'shells', 'rockets', 'cells' ] self.statistics = {} # number of bots in the game self.n_bots = n_bots self.use_scripted_marines = use_scripted_marines # doom skill self.doom_skill = doom_skill # manual control self.count_non_forward_actions = 0 self.count_non_turn_actions = 0 def update_game_variables(self): """ Check and update game variables. """ # read game variables new_v = {k: self.game.get_game_variable(v) for k, v in game_variables} assert all(v.is_integer() or k[-2:] in ['_x', '_y', '_z'] for k, v in new_v.items()) new_v = { k: (int(v) if v.is_integer() else float(v)) for k, v in new_v.items() } health = new_v['health'] armor = new_v['armor'] sel_weapon = new_v['sel_weapon'] sel_ammo = new_v['sel_ammo'] bullets = new_v['bullets'] shells = new_v['shells'] rockets = new_v['rockets'] cells = new_v['cells'] fist = new_v['fist'] pistol = new_v['pistol'] shotgun = new_v['shotgun'] chaingun = new_v['chaingun'] rocketlauncher = new_v['rocketlauncher'] plasmarifle = new_v['plasmarifle'] bfg9000 = new_v['bfg9000'] # check game variables if sel_weapon == -1: logger.warning("SELECTED WEAPON is -1!") new_v['sel_weapon'] = 1 sel_weapon = 1 if sel_ammo == -1: logger.warning("SELECTED AMMO is -1!") new_v['sel_ammo'] = 0 sel_ammo = 0 assert sel_weapon in range(1, 8), sel_weapon assert sel_ammo >= 0, sel_ammo assert all(x in [0, 1] for x in [ fist, pistol, shotgun, chaingun, rocketlauncher, plasmarifle, bfg9000 ]) assert 0 <= health <= 200 or health < 0 and self.game.is_player_dead() assert 0 <= armor <= 200, (health, armor) assert 0 <= bullets <= 200 and 0 <= shells <= 50 assert 0 <= rockets <= 50 and 0 <= cells <= 300 # fist if sel_weapon == 1: assert sel_ammo == 0 # pistol elif sel_weapon == 2: assert pistol and sel_ammo == bullets # shotgun elif sel_weapon == 3: assert shotgun and sel_ammo == shells # chaingun elif sel_weapon == 4: assert chaingun and sel_ammo == bullets # rocket launcher elif sel_weapon == 5: assert rocketlauncher and sel_ammo == rockets # plasma rifle elif sel_weapon == 6: assert plasmarifle and sel_ammo == cells # BFG9000 elif sel_weapon == 7: assert bfg9000 and sel_ammo == cells # update actor properties self.prev_properties = self.properties self.properties = new_v def update_statistics_and_reward(self, action): """ Update statistics of the current game based on the previous and the current properties, and create a reward. """ stats = self.statistics[self.map_id] # reset reward self.reward_builder.reset() # we need to know the current and previous properties assert self.prev_properties is not None and self.properties is not None # distance moving_forward = action[self.mapping['MOVE_FORWARD']] turn_left = action[self.mapping['TURN_LEFT']] turn_right = action[self.mapping['TURN_RIGHT']] if moving_forward and not (turn_left or turn_right): diff_x = self.properties['position_x'] - self.prev_properties[ 'position_x'] diff_y = self.properties['position_y'] - self.prev_properties[ 'position_y'] distance = math.sqrt(diff_x**2 + diff_y**2) self.reward_builder.distance(distance) # kill d = self.properties['score'] - self.prev_properties['score'] if d > 0: self.reward_builder.kill(d) stats['kills'] += d for _ in range(int(d)): self.log('Kill') # death if self.game.is_player_dead(): self.reward_builder.death() stats['deaths'] += 1 self.log('Dead') # suicide if self.properties['frag_count'] < self.prev_properties['frag_count']: self.reward_builder.suicide() stats['suicides'] += 1 self.log('Suicide') # found / lost health d = self.properties['health'] - self.prev_properties['health'] if d != 0: if d > 0: self.reward_builder.medikit(d) stats['medikits'] += 1 else: self.reward_builder.injured(d) self.log('%s health (%i -> %i)' % ( 'Found' if d > 0 else 'Lost', self.prev_properties['health'], self.properties['health'], )) # found / lost armor d = self.properties['armor'] - self.prev_properties['armor'] if d != 0: if d > 0: self.reward_builder.armor() stats['armors'] += 1 self.log('%s armor (%i -> %i)' % ( 'Found' if d > 0 else 'Lost', self.prev_properties['armor'], self.properties['armor'], )) # change weapon if self.properties['sel_weapon'] != self.prev_properties['sel_weapon']: self.log('Switched weapon: %s -> %s' % ( WEAPON_NAMES[self.prev_properties['sel_weapon']], WEAPON_NAMES[self.properties['sel_weapon']], )) # found weapon for i, weapon in enumerate([ 'pistol', 'shotgun', 'chaingun', 'rocketlauncher', 'plasmarifle', 'bfg9000' ]): if self.prev_properties[weapon] == self.properties[weapon]: continue # assert(self.prev_properties[weapon] == 0 and # TODO check # self.properties[weapon] == 1), (weapon, self.prev_properties[weapon], self.properties[weapon]) self.reward_builder.weapon() stats[weapon] += 1 self.log('Found weapon: %s' % WEAPON_NAMES[i + 1]) # found / lost ammo for ammo in ['bullets', 'shells', 'rockets', 'cells']: d = self.properties[ammo] - self.prev_properties[ammo] if d != 0: if d > 0: self.reward_builder.ammo() stats[ammo] += 1 else: self.reward_builder.use_ammo() self.log('%s ammo: %s (%i -> %i)' % ('Found' if d > 0 else 'Lost', ammo, self.prev_properties[ammo], self.properties[ammo])) def log(self, message): """ Log the game event. During training, we don't want to display events. """ if self.log_events: logger.info(message) def start(self, map_id, episode_time=None, manual_control=False, log_events=False): """ Start the game. If `episode_time` is given, the game will end after the specified time. """ assert type(manual_control) is bool self.manual_control = manual_control # Save statistics for this map self.statistics[map_id] = {k: 0 for k in self.stat_keys} # Episode time self.episode_time = episode_time # initialize the game self.game = DoomGame() self.game.set_doom_scenario_path(self.scenario_path) self.game.set_doom_game_path(self.game_path) # map assert map_id > 0 self.map_id = map_id self.game.set_doom_map("map%02i" % map_id) # time limit if episode_time is not None: self.game.set_episode_timeout(int(35 * episode_time)) # log events that happen during the game (useful for testing) self.log_events = log_events # game parameters args = [] # host / server if self.players_per_game > 1: port = 5092 + self.player_rank // self.players_per_game if self.player_rank % self.players_per_game == 0: args.append('-host %i -port %i' % (self.players_per_game, port)) else: args.append('-join 127.0.0.1:%i' % port) else: args.append('-host 1') # screen buffer / depth buffer / labels buffer / mode screen_resolution = getattr(ScreenResolution, self.screen_resolution) self.game.set_screen_resolution(screen_resolution) self.game.set_screen_format(getattr(ScreenFormat, self.screen_format)) self.game.set_depth_buffer_enabled(self.use_depth_buffer) self.game.set_labels_buffer_enabled(self.use_labels_buffer or self.use_game_features) self.game.set_mode(getattr(Mode, self.mode)) # rendering options self.game.set_render_hud(self.render_hud) self.game.set_render_minimal_hud(self.render_minimal_hud) self.game.set_render_crosshair(self.render_crosshair) self.game.set_render_weapon(self.render_weapon) self.game.set_render_decals(self.render_decals) self.game.set_render_particles(self.render_particles) self.game.set_render_effects_sprites(self.render_effects_sprites) # deathmatch mode # players will respawn automatically after they die # autoaim is disabled for all players args.append('-deathmatch') args.append('+sv_forcerespawn 1') args.append('+sv_noautoaim 1') # respawn invincibility / distance # players will be invulnerable for two second after spawning # players will be spawned as far as possible from any other players args.append('+sv_respawnprotect %i' % self.respawn_protect) args.append('+sv_spawnfarthest %i' % self.spawn_farthest) # freelook / agent name / agent color args.append('+freelook %i' % (1 if self.freelook else 0)) args.append('+name %s' % self.name) args.append('+colorset %i' % self.color) # enable the cheat system (so that we can still # send commands to the game in self-play mode) args.append('+sv_cheats 1') # load parameters self.args = args for arg in args: self.game.add_game_args(arg) # window visibility self.game.set_window_visible(self.visible) # available buttons self.mapping = add_buttons(self.game, self.action_builder.available_buttons) # doom skill (https://zdoom.org/wiki/GameSkill) self.game.set_doom_skill(self.doom_skill + 1) # start the game self.game.init() # initialize the game after player spawns self.initialize_game() def reset(self): """ Reset the game if necessary. This can be because: - we reach the end of an episode (we restart the game) - because the agent is dead (we make it respawn) """ self.count_non_forward_actions = 0 # if the player is dead if self.is_player_dead(): # respawn it (deathmatch mode) if self.episode_time is None: self.respawn_player() # or reset the episode (episode ends when the agent dies) else: self.new_episode() # start a new episode if it is finished if self.is_episode_finished(): self.new_episode() # deal with a ViZDoom issue while self.is_player_dead(): logger.warning('Player %i is still dead after respawn.' % self.params.player_rank) self.respawn_player() def update_bots(self): """ Add built-in AI bots. There are two types of AI: built-in AI and ScriptedMarines. """ # only the host takes care of the bots if self.player_rank % self.players_per_game != 0: return if self.use_scripted_marines: command = "pukename set_value always 2 %i" % self.n_bots self.game.send_game_command(command) else: self.game.send_game_command("removebots") for _ in range(self.n_bots): self.game.send_game_command("addbot") def is_player_dead(self): """ Detect whether the player is dead. """ return self.game.is_player_dead() def is_episode_finished(self): """ Return whether the episode is finished. This should only be the case after the episode timeout. """ return self.game.is_episode_finished() def is_final(self): """ Return whether the game is in a final state. """ return self.is_player_dead() or self.is_episode_finished() def new_episode(self): """ Start a new episode. """ assert self.is_episode_finished() or self.is_player_dead() self.game.new_episode() self.log('New episode') self.initialize_game() def respawn_player(self): """ Respawn the player on death. """ assert self.is_player_dead() self.game.respawn_player() self.log('Respawn player') self.initialize_game() def initialize_game(self): """ Initialize the game after the player spawns / respawns. Be sure that properties from the previous life are not considered in this one. """ # generate buffers game_state = self.game.get_state() self._screen_buffer = game_state.screen_buffer self._depth_buffer = game_state.depth_buffer self._labels_buffer = game_state.labels_buffer self._labels = game_state.labels # actor properties self.prev_properties = None self.properties = None # advance a few steps to avoid bugs due # to initial weapon changes in ACS self.game.advance_action(SKIP_INITIAL_ACTIONS) self.update_game_variables() # if there are bots in the game, and if this is a new game self.update_bots() def randomize_textures(self, randomize): """ Randomize the textures of the map. """ assert type(randomize) is bool randomize = 1 if randomize else 0 self.game.send_game_command("pukename set_value always 4 %i" % randomize) def init_bots_health(self, health): """ Initial bots health. """ assert self.use_scripted_marines or health == 100 assert 0 < health <= 100 self.game.send_game_command("pukename set_value always 5 %i" % health) def make_action(self, action, frame_skip=1, sleep=None): """ Make an action. If `sleep` is given, the network will wait `sleep` seconds between each action. """ assert frame_skip >= 1 # convert selected action to the ViZDoom action format action = self.action_builder.get_action(action) # select agent favorite weapon for weapon_name, weapon_ammo, weapon_id in WEAPONS_PREFERENCES: min_ammo = 40 if weapon_name == 'bfg9000' else 1 if self.properties[weapon_name] > 0 and self.properties[ weapon_ammo] >= min_ammo: if self.properties['sel_weapon'] != weapon_id: # action = ([False] * self.mapping['SELECT_WEAPON%i' % weapon_id]) + [True] switch_action = ( [False] * self.mapping['SELECT_WEAPON%i' % weapon_id]) + [True] action = action + switch_action[len(action):] self.log("Manual weapon change: %s -> %s" % (WEAPON_NAMES[self.properties['sel_weapon']], weapon_name)) break if action[self.mapping['MOVE_FORWARD']]: self.count_non_forward_actions = 0 else: self.count_non_forward_actions += 1 if action[self.mapping['TURN_LEFT']] or action[ self.mapping['TURN_RIGHT']]: self.count_non_turn_actions = 0 else: self.count_non_turn_actions += 1 if self.manual_control and (self.count_non_forward_actions >= 30 or self.count_non_turn_actions >= 60): manual_action = [False] * len(action) manual_action[self.mapping['TURN_RIGHT']] = True manual_action[self.mapping['SPEED']] = True if self.count_non_forward_actions >= 30: manual_action[self.mapping['MOVE_FORWARD']] = True manual_repeat = 40 self.count_non_forward_actions = 0 self.count_non_turn_actions = 0 else: manual_action = None # if we are visualizing the experiment, show all the frames one by one if self.visible: if manual_action is not None: logger.warning('Activated manual control') for _ in range(manual_repeat): self.game.make_action(manual_action) else: for _ in range(frame_skip): self.game.make_action(action) # death or episode finished if self.is_player_dead() or self.is_episode_finished(): break # sleep for smooth visualization if sleep is not None: time.sleep(sleep) else: if manual_action is not None: logger.warning('Activated manual control') self.game.make_action(manual_action, manual_repeat) else: self.game.make_action(action, frame_skip) # generate buffers game_state = self.game.get_state() if game_state is not None: self._screen_buffer = game_state.screen_buffer self._depth_buffer = game_state.depth_buffer self._labels_buffer = game_state.labels_buffer self._labels = game_state.labels # update game variables / statistics rewards self.update_game_variables() self.update_statistics_and_reward(action) @property def reward(self): """ Return the reward value. """ return self.reward_builder.reward def close(self): """ Close the current game. """ self.game.close() def print_statistics(self, eval_time=None): """ Print agent statistics. If `map_id` is given, statistics are given for the specified map only. Otherwise, statistics are given for all maps, with a summary. """ if 'all' in self.statistics: del self.statistics['all'] map_ids = sorted(self.statistics.keys()) if len(map_ids) == 0: logger.info("No statistics to show!") return for v in self.statistics.values(): assert set(self.stat_keys) == set(v.keys()) # sum the results on all maps for global statistics self.statistics['all'] = { k: sum(v[k] for v in self.statistics.values()) for k in self.stat_keys } # number of frags (kills - suicides) # 100% accurate if the number of frags is given by 'FRAGCOUNT' # almost 100% accurate if it is based on an internal ACS variable for v in self.statistics.values(): v['frags'] = v['kills'] - v['suicides'] # number of frags per minutes (with and without respawn time) if eval_time is not None: assert eval_time % 60 == 0 for k, v in self.statistics.items(): eval_minutes = eval_time / 60 if k == 'all': eval_minutes *= (len(self.statistics) - 1) respawn_time = (v['deaths'] * RESPAWN_SECONDS * 1.0 / 60) v['frags_pm'] = v['frags'] * 1.0 / eval_minutes v['frags_pm_r'] = v['frags'] * 1.0 / (eval_minutes + respawn_time) # Kills / Deaths # 100% accurate if the number of kills is given by an ACS variable # almost 100% accurate if it is based on 'FRAGCOUNT' for v in self.statistics.values(): v['k/d'] = v['kills'] * 1.0 / max(1, v['deaths']) # statistics to log log_lines = [ [''] + ['Map%02i' % i for i in map_ids] + ['All'], ('Kills', 'kills'), ('Deaths', 'deaths'), ('Suicides', 'suicides'), ('Frags', 'frags'), ('Frags/m', 'frags_pm'), ('Frags/m (r)', 'frags_pm_r'), ('K/D', 'k/d'), None, ('Medikits', 'medikits'), ('Armors', 'armors'), ('SuperShotgun', 'shotgun'), ('Chaingun', 'chaingun'), ('RocketLauncher', 'rocketlauncher'), ('PlasmaRifle', 'plasmarifle'), ('BFG9000', 'bfg9000'), ('Bullets', 'bullets'), ('Shells', 'shells'), ('Rockets', 'rockets'), ('Cells', 'cells'), ] # only show statistics on all maps if there is more than one map if len(map_ids) > 1: map_ids.append('all') logger.info('*************** Game statistics summary ***************') log_pattern = '{: >15}' + ('{: >8}' * len(map_ids)) for line in log_lines: if line is None: logger.info('') else: if type(line) is tuple: assert len(line) == 2 name, k = line if k in ['frags_pm', 'frags_pm_r'] and eval_time is None: continue line = ['%s:' % name] line += [self.statistics[map_id][k] for map_id in map_ids] else: assert type(line) is list line = line[:len(map_ids) + 1] line = ['%.3f' % x if type(x) is float else x for x in line] logger.info(log_pattern.format(*line)) def observe_state(self, params, last_states): """ Observe the current state of the game. """ # read game state screen, game_features = process_buffers(self, params) variables = [self.properties[x[0]] for x in params.game_variables] last_states.append(GameState(screen, variables, game_features)) # update most recent states if len(last_states) == 1: last_states.extend([last_states[0]] * (params.hist_size - 1)) else: assert len(last_states) == params.hist_size + 1 del last_states[0] # return the screen and the game features return screen, game_features
class DoomEnv(gym.Env): metadata = { 'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 35 } def __init__(self, level): self.previous_level = -1 self.level = level self.game = DoomGame() self.loader = Loader() self.doom_dir = os.path.dirname(os.path.abspath(__file__)) self._mode = 'algo' # 'algo' or 'human' self.no_render = False # To disable double rendering in human mode self.viewer = None self.is_initialized = False # Indicates that reset() has been called self.curr_seed = 0 self.lock = (DoomLock()).get_lock() # self.action_space = spaces.Discrete(43) # used to be in the old code self.action_space = spaces.MultiBinary(NUM_ACTIONS) self.allowed_actions = list(range(NUM_ACTIONS)) self.screen_height = 120 self.screen_width = 160 self.screen_resolution = ScreenResolution.RES_160X120 self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3), dtype=np.uint8) self.seed() self._configure() def _configure(self, lock=None, **kwargs): if 'screen_resolution' in kwargs: logger.warn( 'Deprecated - Screen resolution must now be set using a wrapper. See documentation for details.' ) # Multiprocessing lock if lock is not None: self.lock = lock def _load_level(self): # Closing if is_initialized if self.is_initialized: self.is_initialized = False self.game.close() self.game = DoomGame() # Customizing level if getattr(self, '_customize_game', None) is not None and callable( self._customize_game): self.level = -1 self._customize_game() else: # Loading Paths if not self.is_initialized: self.game.set_vizdoom_path(self.loader.get_vizdoom_path()) self.game.set_doom_game_path(self.loader.get_freedoom_path()) # Common settings self.game.load_config( os.path.join(self.doom_dir, 'assets/%s' % DOOM_SETTINGS[self.level][CONFIG])) self.game.set_doom_scenario_path( self.loader.get_scenario_path( DOOM_SETTINGS[self.level][SCENARIO])) if DOOM_SETTINGS[self.level][MAP] != '': if RANDOMIZE_MAPS > 0 and 'labyrinth' in DOOM_SETTINGS[ self.level][CONFIG].lower(): if 'fix' in DOOM_SETTINGS[self.level][SCENARIO].lower(): # mapId = 'map%02d'%np.random.randint(1, 23) mapId = 'map%02d' % np.random.randint(4, 8) else: mapId = 'map%02d' % np.random.randint( 1, RANDOMIZE_MAPS + 1) print( '\t=> Special Config: Randomly Loading Maps. MapID = ' + mapId) self.game.set_doom_map(mapId) else: print('\t=> Default map loaded. MapID = ' + DOOM_SETTINGS[self.level][MAP]) self.game.set_doom_map(DOOM_SETTINGS[self.level][MAP]) self.game.set_doom_skill(DOOM_SETTINGS[self.level][DIFFICULTY]) self.allowed_actions = DOOM_SETTINGS[self.level][ACTIONS] self.game.set_screen_resolution(self.screen_resolution) self.previous_level = self.level self._closed = False # Algo mode if 'human' != self._mode: if NO_MONSTERS: print('\t=> Special Config: Monsters Removed.') self.game.add_game_args('-nomonsters 1') self.game self.game.set_window_visible(False) self.game.set_mode(Mode.PLAYER) self.no_render = False try: with self.lock: self.game.init() except (ViZDoomUnexpectedExitException, ViZDoomErrorException): raise error.Error( 'VizDoom exited unexpectedly. This is likely caused by a missing multiprocessing lock. ' + 'To run VizDoom across multiple processes, you need to pass a lock when you configure the env ' + '[e.g. env.configure(lock=my_multiprocessing_lock)], or create and close an env ' + 'before starting your processes [e.g. env = gym.make("DoomBasic-v0"); env.close()] to cache a ' + 'singleton lock in memory.') self._start_episode() self.is_initialized = True return self.game.get_state().screen_buffer.copy() # Human mode else: if NO_MONSTERS: print('\t=> Special Config: Monsters Removed.') self.game.add_game_args('-nomonsters 1') self.game.add_game_args('+freelook 1') self.game.set_window_visible(True) self.game.set_mode(Mode.SPECTATOR) self.no_render = True with self.lock: self.game.init() self._start_episode() self.is_initialized = True self._play_human_mode() return np.zeros(shape=self.observation_space.shape, dtype=np.uint8) def _start_episode(self): if self.curr_seed > 0: self.game.set_seed(self.curr_seed) self.curr_seed = 0 self.game.new_episode() return def _play_human_mode(self): while not self.game.is_episode_finished(): self.game.advance_action() state = self.game.get_state() total_reward = self.game.get_total_reward() info = self._get_game_variables(state.game_variables) info["TOTAL_REWARD"] = round(total_reward, 4) print('===============================') print('State: #' + str(state.number)) print('Action: \t' + str(self.game.get_last_action()) + '\t (=> only allowed actions)') print('Reward: \t' + str(self.game.get_last_reward())) print('Total Reward: \t' + str(total_reward)) print('Variables: \n' + str(info)) sleep(0.02857) # 35 fps = 0.02857 sleep between frames print('===============================') print('Done') return def old_step(self, action): """ action: a number in range 0..42 We get this from the simontudo and his predecessors, it transforms a numeric action from space Discrete(43) into a indicator action . However, we can only press one button at the same time. """ # Convert to array action_arr = np.zeros(NUM_ACTIONS, dtype=int) action_arr[action] = 1 action = action_arr assert self.is_initialized, "Doom env not reset, call .reset()" # action is a list of numbers but DoomGame.make_action expects a list of ints if len(self.allowed_actions) > 0: list_action = [ int(action[action_idx]) for action_idx in self.allowed_actions ] else: list_action = [int(x) for x in action] try: reward = self.game.make_action(list_action) state = self.game.get_state() if self.game.is_episode_finished(): info = {"TOTAL_REWARD": round(self.game.get_total_reward(), 4)} is_finished = True return np.zeros(shape=self.observation_space.shape, dtype=np.uint8), reward, is_finished, info else: info = self._get_game_variables(state.game_variables) info["TOTAL_REWARD"] = round(self.game.get_total_reward(), 4) is_finished = False return state.screen_buffer.copy(), reward, is_finished, info except vizdoom.vizdoom.ViZDoomIsNotRunningException: return np.zeros(shape=self.observation_space.shape, dtype=np.uint8), 0, True, {} def step(self, action): """ action: iterable of length 43, contains indicators of whether given buttons was pressed. Written by me. """ list_action = [int(x) for x in action] try: reward = self.game.make_action(list_action) state = self.game.get_state() if self.game.is_episode_finished(): info = {"TOTAL_REWARD": round(self.game.get_total_reward(), 4)} is_finished = True return np.zeros(shape=self.observation_space.shape, dtype=np.uint8), reward, is_finished, info else: info = self._get_game_variables(state.game_variables) info["TOTAL_REWARD"] = round(self.game.get_total_reward(), 4) is_finished = False return state.screen_buffer.copy(), reward, is_finished, info except vizdoom.vizdoom.ViZDoomIsNotRunningException: return np.zeros(shape=self.observation_space.shape, dtype=np.uint8), 0, True, {} def reset(self): if self.is_initialized and not self._closed: self._start_episode() screen_buffer = self.game.get_state().screen_buffer if screen_buffer is None: raise error.Error( 'VizDoom incorrectly initiated. This is likely caused by a missing multiprocessing lock. ' + 'To run VizDoom across multiple processes, you need to pass a lock when you configure the env ' + '[e.g. env.configure(lock=my_multiprocessing_lock)], or create and close an env ' + 'before starting your processes [e.g. env = gym.make("DoomBasic-v0"); env.close()] to cache a ' + 'singleton lock in memory.') return screen_buffer.copy() else: return self._load_level() def render(self, mode='human', close=False): if close: if self.viewer is not None: self.viewer.close() self.viewer = None # If we don't None out this reference pyglet becomes unhappy return try: if 'human' == mode and self.no_render: return state = self.game.get_state() img = state.screen_buffer # VizDoom returns None if the episode is finished, let's make it # an empty image so the recorder doesn't stop if img is None: img = np.zeros(shape=self.observation_space.shape, dtype=np.uint8) if mode == 'rgb_array': return img elif mode is 'human': from gym.envs.classic_control import rendering if self.viewer is None: self.viewer = rendering.SimpleImageViewer() self.viewer.imshow(img) except vizdoom.vizdoom.ViZDoomIsNotRunningException: return np.zeros(shape=self.observation_space.shape, dtype=np.uint8) except AttributeError: return np.zeros(shape=self.observation_space.shape, dtype=np.uint8) def close(self): # Lock required for VizDoom to close processes properly with self.lock: self.game.close() def seed(self, seed=None): self.curr_seed = seeding.hash_seed(seed) % 2**32 return [self.curr_seed] def _get_game_variables(self, state_variables): info = {"LEVEL": self.level} if state_variables is None: return info info['KILLCOUNT'] = state_variables[0] info['ITEMCOUNT'] = state_variables[1] info['SECRETCOUNT'] = state_variables[2] info['FRAGCOUNT'] = state_variables[3] info['HEALTH'] = state_variables[4] info['ARMOR'] = state_variables[5] info['DEAD'] = state_variables[6] info['ON_GROUND'] = state_variables[7] info['ATTACK_READY'] = state_variables[8] info['ALTATTACK_READY'] = state_variables[9] info['SELECTED_WEAPON'] = state_variables[10] info['SELECTED_WEAPON_AMMO'] = state_variables[11] info['AMMO1'] = state_variables[12] info['AMMO2'] = state_variables[13] info['AMMO3'] = state_variables[14] info['AMMO4'] = state_variables[15] info['AMMO5'] = state_variables[16] info['AMMO6'] = state_variables[17] info['AMMO7'] = state_variables[18] info['AMMO8'] = state_variables[19] info['AMMO9'] = state_variables[20] info['AMMO0'] = state_variables[21] return info
class DoomEnvironment: def __init__(self, scenario, path_to_config="doom/config"): self.game = DoomGame() self.game.load_config(path_to_config + "/" + scenario + ".cfg") self.game.set_doom_scenario_path(path_to_config + "/" + scenario + ".wad") self.game.set_window_visible(False) self.game.init() self.num_actions = len(self.game.get_available_buttons()) def reset(self): self.game.new_episode() game_state = self.game.get_state() obs = game_state.screen_buffer self.h, self.w = obs.shape[1:3] self.current_obs = self.preprocess_obs(obs) if self.game.get_available_game_variables_size() == 2: self.ammo, self.health = game_state.game_variables return self.get_obs() def get_obs(self): return self.current_obs[:, :, None] def get_obs_rgb(self): img = self.game.get_state().screen_buffer img = np.rollaxis(img, 0, 3) img = np.reshape(img, [self.h, self.w, 3]) return img.astype(np.uint8) def preprocess_obs(self, obs): img = np.rollaxis(obs, 0, 3) img = np.reshape(img, [self.h, self.w, 3]).astype(np.float32) img = img[:, :, 0] * 0.299 + img[:, :, 1] * 0.587 + img[:, :, 2] * 0.114 img = Image.fromarray(img) img = img.resize((84, 84), Image.BILINEAR) img = np.array(img) return img.astype(np.uint8) def action_to_doom(self, a): action = [0 for i in range(self.num_actions)] action[int(a)] = 1 return action def step(self, a): action = self.action_to_doom(a) reward = self.game.make_action(action) done = self.game.is_episode_finished() if done: new_obs = np.zeros_like(self.current_obs, dtype=np.uint8) else: game_state = self.game.get_state() new_obs = game_state.screen_buffer new_obs = self.preprocess_obs(new_obs) self.current_obs = new_obs return self.get_obs(), reward, done def watch_random_play(self, max_ep_length=1000, frame_skip=4): self.reset() for i in range(max_ep_length): a = np.random.randint(self.num_actions) obs, reward, done = self.step(a) if done: break img = self.get_obs_rgb() if i % frame_skip == 0: plt.imshow(img) display.clear_output(wait=True) display.display(plt.gcf())
def new_episode(game: DoomGame, spawn_point_counter: Dict[int, int], n_spawn_points: int) -> None: """ Workaround for improper random number generation with ACS. In certain scenarios the agent is spawned at a random spawn point. However, instead of this distribution being uniform, one single id is heavily preferred. In order to not have the agent encounter too much of the same starting points, this method creates new episodes until one is found with a different id than the most prominent one. :param game: The instance of VizDoom :param spawn_point_counter: The dict holding the counts of the previous spawn points :param n_spawn_points: Number of spawn points in a given scenario """ while True: game.new_episode() spawn_point = game.get_game_variable(GameVariable.USER1) spawn_point %= 21 if spawn_point == 0 or spawn_point is math.isnan(spawn_point): return # Spawn point undefined if spawn_point in spawn_point_counter: spawn_point_counter[spawn_point] += 1 else: spawn_point_counter[spawn_point] = 0 if spawn_point != max(spawn_point_counter, key = spawn_point_counter.get) and len(spawn_point_counter) >= n_spawn_points: return
class DoomEnvironment: def __init__(self, config, visible, skiprate): self._game = DoomGame() self._game.load_config(config) self._game.set_window_visible(visible) self._game.set_mode(Mode.PLAYER) self._game.init() n_actions = self._game.get_available_buttons_size() self._actions = [list(a) for a in it.product([0, 1], repeat=n_actions)] self._skiprate = skiprate def make_visible(self): self._game.close() self._game.set_window_visible(True) self._game.set_mode(Mode.ASYNC_PLAYER) self._game.init() def get_n_buttons(self): return self._game.get_available_buttons_size() def observe(self): observation = self._game.get_state() screen = observation.screen_buffer game_variables = observation.game_variables return screen, game_variables def step(self, action_id): """Takes id of single action and performs it for self.skiprate frames :param action_id: index of action to perform :return: reward, is_done """ reward = self._game.make_action(self._actions[action_id], self._skiprate) return reward, self._game.is_episode_finished() def advance_action_step(self, action_id): """Takes id of single action and performs it for self.skiprate frames and renders every frame :param action_id: index of action to perform :return: is_done """ reward = 0.0 for _ in range(self._skiprate): reward += self._game.make_action(self._actions[action_id]) # it is vital to break if done for correct reward shaping if self._game.is_episode_finished(): break return reward, self._game.is_episode_finished() def reset(self): self._game.new_episode() def get_episode_reward(self): """Careful! Returns ___non-shaped___ episode reward""" return self._game.get_total_reward()
class VizDoomEnv(gym.Env): def __init__(self, config='my_way_home.cfg', repeat_action=1, render=False): self._game = DoomGame() self._game.load_config(config) self._game.set_mode(Mode.PLAYER) self._game.set_screen_format(ScreenFormat.GRAY8) self._game.set_screen_resolution(ScreenResolution.RES_640X480) self._game.set_window_visible(render) self._game.init() self._actions = self._get_actions() self._repeat_action = repeat_action self._is_rendered = False def _get_actions(self): num_actions = self._game.get_available_buttons_size() actions = [] for perm in itertools.product([False, True], repeat=num_actions): actions.append(list(perm)) return actions def _get_observation(self): state = self._game.get_state() if state is not None: return state.screen_buffer return None def _get_terminal(self): return self._game.is_episode_finished() def reset(self): self._game.new_episode() return self._get_observation() def step(self, action): action_ = self._actions[action] reward = self._game.make_action(action_, self._repeat_action) return self._get_observation(), reward, self._get_terminal(), [] def render(self, mode='human'): self._game.set_window_visible(True) def close(self): self._game.close()
class VizDoom(gym.Env): """ Wraps a VizDoom environment """ def __init__(self, cfg_path, number_maps, scaled_resolution=(42, 42), action_frame_repeat=4, clip=(-1, 1), seed=None, data_augmentation=False): """ Gym environment for training reinforcement learning agents. :param cfg_path: name of the mission (.cfg) to run :param number_maps: number of maps which are contained within the cfg file :param scaled_resolution: resolution (height, width) of the observation to be returned with each step :param action_frame_repeat: how many game tics should an action be active :param clip: how much the reward returned on each step should be clipped to :param seed: seed for random, used to determine the other that the doom maps should be shown. :param data_augmentation: bool to determine whether or not to use data augmentation (adding randomly colored, randomly sized boxes to observation) """ self.cfg_path = str(cfg_path) if not os.path.exists(self.cfg_path): raise ValueError("Cfg file not found", cfg_path) if not self.cfg_path.endswith('.cfg'): raise ValueError("cfg_path must end with .cfg") self.number_maps = number_maps self.scaled_resolution = scaled_resolution self.action_frame_repeat = action_frame_repeat self.clip = clip self.data_augmentation = data_augmentation if seed: random.seed(seed) super(VizDoom, self).__init__() self._logger = logging.getLogger(__name__) self._logger.info("Creating environment: VizDoom (%s)", self.cfg_path) # Create an instace on VizDoom game, initalise it from a scenario config file self.env = DoomGame() self.env.load_config(self.cfg_path) self.env.init() # Perform config validation: # Only RGB format with a seperate channel per colour is supported # assert self.env.get_screen_format() == ScreenFormat.RGB24 # Only discreete actions are supported (no delta actions) available_actions = self.env.get_available_buttons() not_supported_actions = [ Button.LOOK_UP_DOWN_DELTA, Button.TURN_LEFT_RIGHT_DELTA, Button.MOVE_LEFT_RIGHT_DELTA, Button.MOVE_UP_DOWN_DELTA, Button.MOVE_FORWARD_BACKWARD_DELTA ] assert len((set(available_actions) - set(not_supported_actions))) == len(available_actions) # Allow only one button to be pressed at a given step self.action_space = gym.spaces.Discrete( self.env.get_available_buttons_size()) rows = scaled_resolution[1] columns = scaled_resolution[0] self.observation_space = gym.spaces.Box(0.0, 255.0, shape=(columns, rows, 3), dtype=np.float32) self._rgb_array = None self.reset() def _process_image(self, shape=None): """ Convert the vizdoom environment observation numpy are into the desired resolution and shape :param shape: desired shape in the format (rows, columns) :return: resized and rescaled image in the format (rows, columns, channels) """ if shape is None: rows, columns, _ = self.observation_space.shape else: rows, columns = shape # PIL resize has indexing opposite to numpy array img = VizDoom._resize(self._rgb_array.transpose(1, 2, 0), (columns, rows)) return img @staticmethod def _augment_data(img): """ Augment input image with N randomly colored boxes of dimension x by y where N is randomly sampled between 0 and 6 and x and y are randomly sampled from between 0.1 and 0.35 :param img: input image to be augmented - format (rows, columns, channels) :return img: augmented image - format (rows, columns, channels) """ dimx = img.shape[0] dimy = img.shape[1] max_rand_dim = .25 min_rand_dim = .1 num_blotches = np.random.randint(0, 6) for _ in range(num_blotches): # locations in [0,1] rand = np.random.rand rx = rand() ry = rand() rdx = rand() * max_rand_dim + min_rand_dim rdy = rand() * max_rand_dim + min_rand_dim rx, rdx = [round(r * dimx) for r in (rx, rdx)] ry, rdy = [round(r * dimy) for r in (ry, rdy)] for c in range(3): img[rx:rx + rdx, ry:ry + rdy, c] = np.random.randint(0, 255) return img @staticmethod def _resize(img, shape): """Resize the specified image. :param img: image to resize :param shape: desired shape in the format (rows, columns) :return: resized image """ if not (OPENCV_AVAILABLE or PILLOW_AVAILABLE): raise ValueError('No image library backend found.' ' Install either ' 'OpenCV or Pillow to support image processing.') if OPENCV_AVAILABLE: return cv2.resize(img, shape, interpolation=cv2.INTER_AREA) if PILLOW_AVAILABLE: return np.array(PIL.Image.fromarray(img).resize(shape)) raise NotImplementedError def reset(self): """ Resets environment to start a new mission. If there is more than one maze it will randomly select a new maze. :return: initial observation of the environment as an rgb array in the format (rows, columns, channels) """ if self.number_maps is not 0: self.doom_map = random.choice( ["map" + str(i).zfill(2) for i in range(self.number_maps)]) self.env.set_doom_map(self.doom_map) self.env.new_episode() self._rgb_array = self.env.get_state().screen_buffer observation = self._process_image() return observation def step(self, action): """Perform the specified action for the self.action_frame_repeat ticks within the environment. :param action: the index of the action to perform. The actions are specified when the cfg is created. The defaults are "MOVE_FORWARD TURN_LEFT TURN_RIGHT" :return: tuple following the gym interface, containing: - observation as a numpy array of shape (rows, height, channels) - scalar clipped reward - boolean which is true when the environment is done - {} """ one_hot_action = np.zeros(self.action_space.n, dtype=int) one_hot_action[action] = 1 reward = self.env.make_action(list(one_hot_action), self.action_frame_repeat) done = self.env.is_episode_finished() # state is available only if the episode is still running if not done: self._rgb_array = self.env.get_state().screen_buffer observation = self._process_image() if self.data_augmentation: observation = VizDoom._augment_data(observation) if self.clip: reward = np.clip(reward, self.clip[0], self.clip[1]) return observation, reward, done, {} def step_record(self, action, record_path, record_shape=(120, 140)): """Perform the specified action for the self.action_frame_repeat ticks within the environment. :param action: the index of the action to perform. The actions are specified when the cfg is created. The defaults are "MOVE_FORWARD TURN_LEFT TURN_RIGHT" :param record_path: the path to save the image of the environment to :param record_shape: the shape of the image to save :return: tuple following the gym interface, containing: - observation as a numpy array of shape (rows, height, channels) - scalar clipped reward - boolean which is true when the environment is done - {} """ one_hot_action = np.zeros(self.action_space.n, dtype=int) one_hot_action[action] = 1 reward = 0 for _ in range(self.action_frame_repeat // 2): reward += self.env.make_action(list(one_hot_action), 2) env_state = self.env.get_state() if env_state: self._rgb_array = self.env.get_state().screen_buffer imageio.imwrite( os.path.join(record_path, str(datetime.datetime.now()) + ".png"), self._process_image(record_shape)) done = self.env.is_episode_finished() # state is available only if the episode is still running if not done: self._rgb_array = self.env.get_state().screen_buffer observation = self._process_image() if self.clip: reward = np.clip(reward, self.clip[0], self.clip[1]) return observation, reward, done, {} def close(self): """Close environment""" self.env.close() def render(self, mode='rgb_array'): """Render frame""" if mode == 'rgb_array': return self._rgb_array raise NotImplementedError def create_env(self): """ Returns a function to create an environment with the generated mazes. Used for vectorising the environment. For example as used by Stable Baselines :return: a function to create an environment with the generated mazes """ return lambda: VizDoom(self.cfg_path, number_maps=self.number_maps, scaled_resolution=self.scaled_resolution, action_frame_repeat=self.action_frame_repeat)
class DoomScenario: """ DoomScenario class runs instances of Vizdoom according to scenario configuration (.cfg) files. Scenario Configuration files for this project are located in the /src/configs/ folder. """ def __init__(self, config_filename): ''' Method initiates Vizdoom with desired configuration file. ''' self.config_filename = config_filename self.game = DoomGame() self.game.load_config("configs/" + config_filename) self.game.set_window_visible(False) self.game.init() self.res = (self.game.get_screen_height(), self.game.get_screen_width()) self.actions = [ list(a) for a in it.product([0, 1], repeat=self.game.get_available_buttons_size()) ] self.pbar = None self.game.new_episode() def play(self, action, tics): ''' Method advances state with desired action for a number of tics. ''' self.game.set_action(action) self.game.advance_action(tics, True) if self.pbar: self.pbar.update(int(tics)) def get_processed_state(self, depth_radius, depth_contrast): ''' Method processes the Vizdoom RGB and depth buffer into a composite one channel image that can be used by the Models. depth_radius defines how far the depth buffer sees with 1.0 being as far as ViZDoom allows. depth_contrast defines how much of the depth buffer is in the final processed image as compared to the greyscaled RGB buffer. **processed = (1-depth_contrast)* grey_buffer + depth_contrast*depth_buffer ''' state = self.game.get_state() if not self.game.is_episode_finished(): img = state.screen_buffer # screen pixels # print(img) screen_buffer = np.array(img).astype('float32') / 255 # print(screen_buffer.shape) # (3, 120, 160) try: # Grey Scaling grey_buffer = np.dot(np.transpose(screen_buffer, (1, 2, 0)), [0.21, 0.72, 0.07]) # print(grey_buffer.shape) # (120, 160) # Depth Radius depth_buffer = np.array(state.depth_buffer).astype('float32') / 255 depth_buffer[(depth_buffer > depth_radius)] = depth_radius #Effects depth radius depth_buffer_filtered = (depth_buffer - np.amin(depth_buffer)) / ( np.amax(depth_buffer) - np.amin(depth_buffer)) # Depth Contrast processed_buffer = ( (1 - depth_contrast) * grey_buffer) + (depth_contrast * (1 - depth_buffer)) processed_buffer = (processed_buffer - np.amin(processed_buffer) ) / (np.amax(processed_buffer) - np.amin(processed_buffer)) processed_buffer = np.round(processed_buffer, 6) processed_buffer = processed_buffer.reshape(self.res[-2:]) except: processed_buffer = np.zeros(self.res[-2:]) return processed_buffer # balance the depth & RGB data def run(self, agent, save_replay='', verbose=False, return_data=False): ''' Method runs a instance of DoomScenario. ''' if return_data: data_S = [] data_a = [] if verbose: print("\nRunning Simulation:", self.config_filename) self.pbar = tqdm(total=self.game.get_episode_timeout()) # Initiate New Instance self.game.close() self.game.set_window_visible(False) self.game.add_game_args("+vid_forcesurface 1 ") self.game.init() if save_replay != '': self.game.new_episode("../data/replay_data/" + save_replay) else: self.game.new_episode() # Run Simulation while not self.game.is_episode_finished(): S = agent.get_state_data(self) q = agent.model.online_network.predict(S) if np.random.random() < 0.1: q = np.random.choice(len(q[0]), 1, p=softmax(q[0], 1))[0] else: q = int(np.argmax(q[0])) a = agent.model.predict(self, q) if return_data: delta = np.zeros((len(self.actions))) a_ = np.cast['int'](a) delta[a_] = 1 data_S.append(S.reshape(S.shape[1], S.shape[2], S.shape[3])) data_a.append(delta) if not self.game.is_episode_finished(): self.play(a, agent.frame_skips + 1) if agent.model.__class__.__name__ == 'HDQNModel' and not self.game.is_episode_finished( ): if q >= len(agent.model.actions): for i in range(agent.model.skill_frame_skip): if not self.game.is_episode_finished(): a = agent.model.predict(self, q) self.play(a, agent.frame_skips + 1) else: break # Reset Agent and Return Score agent.frames = None if agent.model.__class__.__name__ == 'HDQNModel': agent.model.sub_model_frames = None score = self.game.get_total_reward() if verbose: self.pbar.close() print("Total Score:", score) if return_data: data_S = np.array(data_S) data_a = np.array(data_a) return [data_S, data_a] return score def replay(self, filename, verbose=False, doom_like=False): ''' Method runs a replay of the simulations at 800 x 600 resolution. ''' print("\nRunning Replay:", filename) # Initiate Replay self.game.close() self.game.set_screen_resolution(ScreenResolution.RES_800X600) self.game.set_window_visible(True) self.game.add_game_args("+vid_forcesurface 1") if doom_like: self.game.set_render_hud(True) self.game.set_render_minimal_hud(False) self.game.set_render_crosshair(False) self.game.set_render_weapon(True) self.game.set_render_particles(True) self.game.init() self.game.replay_episode("../data/replay_data/" + filename) # Run Replay while not self.game.is_episode_finished(): if verbose: print("Reward:", self.game.get_last_reward()) self.game.advance_action() # Print Score score = self.game.get_total_reward() print("Total Score:", score) self.game.close() def apprentice_run(self, test=False): ''' Method runs an apprentice data gathering. ''' # Initiate New Instance self.game.close() self.game.set_mode(Mode.SPECTATOR) self.game.set_screen_resolution(ScreenResolution.RES_800X600) self.game.set_window_visible(True) self.game.set_ticrate(30) self.game.init() self.game.new_episode() # Run Simulation while not self.game.is_episode_finished(): self.game.advance_action() self.game.close()
def train(conf): #to get total time of training start_time = time.time() #set the seeds for reproductability random.seed(conf.seed) np.random.seed(conf.seed) tf.set_random_seed(conf.seed) # Avoid Tensorflow eats up GPU memory config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) K.set_session(sess) game = DoomGame() game.load_config("VizDoom/scenarios/defend_the_center.cfg") game.set_sound_enabled(True) game.set_screen_resolution(ScreenResolution.RES_640X480) game.set_window_visible(False) game.set_living_reward(0.1) game.init() game.new_episode() game_state = game.get_state() misc = game_state.game_variables # [KILLCOUNT, AMMO, HEALTH] prev_misc = misc action_size = game.get_available_buttons_size() img_rows , img_cols = 64, 64 # Convert image into Black and white img_channels = 4 # We stack 4 frames state_size = (img_rows, img_cols, img_channels) agent = DoubleDQNAgent(state_size, action_size, conf) agent.model = Networks.dqn(state_size, action_size, agent.learning_rate) agent.target_model = Networks.dqn(state_size, action_size, agent.learning_rate) x_t = game_state.screen_buffer # 480 x 640 x_t = preprocessImg(x_t, size=(img_rows, img_cols)) s_t = np.stack(([x_t]*4), axis=2) # It becomes 64x64x4 s_t = np.expand_dims(s_t, axis=0) # 1x64x64x4 is_terminated = game.is_episode_finished() # Start training epsilon = agent.initial_epsilon GAME = 0 t = 0 max_life = 0 # Maximum episode life (Proxy for agent performance) life = 0 # Buffer to compute rolling statistics life_buffer, ammo_buffer, kills_buffer = [], [], [] scores, episodes, steps, kills, ammos = [], [], [], [], [] step = 0 episode = conf.episode e = 0 score = 0 while e < episode: loss = 0 Q_max = 0 r_t = 0 a_t = np.zeros([action_size]) # Epsilon Greedy action_idx = agent.get_action(s_t) a_t[action_idx] = 1 a_t = a_t.astype(int) r_t = game.make_action(a_t.tolist(), agent.frame_per_action) game_state = game.get_state() # Observe again after we take the action is_terminated = game.is_episode_finished() # print(r_t) score += r_t step += 1 if (is_terminated): if (life > max_life): max_life = life GAME += 1 life_buffer.append(life) ammo_buffer.append(misc[1]) kills_buffer.append(misc[0]) kills.append(misc[0]) ammos.append(misc[1]) print ("Episode Finish ", misc) # print(scores) game.new_episode() game_state = game.get_state() misc = game_state.game_variables x_t1 = game_state.screen_buffer scores.append(score) score = 0 steps.append(step) episodes.append(e) e += 1 x_t1 = game_state.screen_buffer misc = game_state.game_variables x_t1 = preprocessImg(x_t1, size=(img_rows, img_cols)) x_t1 = np.reshape(x_t1, (1, img_rows, img_cols, 1)) s_t1 = np.append(x_t1, s_t[:, :, :, :3], axis=3) r_t = agent.shape_reward(r_t, misc, prev_misc, t) if (is_terminated): life = 0 else: life += 1 # Update the cache prev_misc = misc # save the sample <s, a, r, s'> to the replay memory and decrease epsilon agent.replay_memory(s_t, action_idx, r_t, s_t1, is_terminated, t) # Do the training if t > agent.observe and t % agent.timestep_per_train == 0: Q_max, loss = agent.train_replay() s_t = s_t1 t += 1 # print info state = "" if t <= agent.observe: state = "observe" elif t > agent.observe and agent.epsilon > agent.final_epsilon: state = "explore" else: state = "train" if (is_terminated): print("TIME", t, "/ GAME", GAME, "/ STATE", state, \ "/ EPSILON", agent.epsilon, "/ ACTION", action_idx, "/ REWARD", score, \ "/ Q_MAX %e" % np.max(Q_max), "/ LIFE", max_life, "/ LOSS", loss) # Save Agent's Performance Statistics if GAME % agent.stats_window_size == 0 and t > agent.observe: print("Update Rolling Statistics") agent.mavg_score.append(np.mean(np.array(life_buffer))) agent.var_score.append(np.var(np.array(life_buffer))) agent.mavg_ammo_left.append(np.mean(np.array(ammo_buffer))) agent.mavg_kill_counts.append(np.mean(np.array(kills_buffer))) # Reset rolling stats buffer life_buffer, ammo_buffer, kills_buffer = [], [], [] total_time = time.time() - start_time return steps, scores, total_time, kills, ammos
class VizDoomGym(gym.Env): """ Wraps a VizDoom environment """ def __init__(self): raise NotImplementedError def _init(self, mission_file: str, scaled_resolution: list): """ :param mission_file: name of the mission (.cfg) to run, :param scaled_resolution: resolution (height, width) of the video frames to run training on """ super(VizDoomGym, self).__init__() self.mission_file = mission_file self._logger = logging.getLogger(__name__) self._logger.info("Creating environment: VizDoom (%s)", self.mission_file) self.deathmatch = True # distance we need the agent to travel per time-step, otherwise we penalise self.distance_threshold = 15 self.prev_properties = None self.properties = None self.cum_kills = np.array([0]) # Create an instace on VizDoom game, initalise it from a scenario config file self.env = DoomGame() self.env.load_config(self.mission_file) self.env.set_window_visible(False) self.env.set_screen_format(ScreenFormat.RGB24) if self.deathmatch: self.env.add_game_args("-deathmatch") self.env.set_doom_skill(4) self._action_frame_repeat = 4 self.env.init() # Perform config validation: # Only RGB format with a seperate channel per colour is supported assert self.env.get_screen_format() == ScreenFormat.RGB24 # Only discrete actions are supported (no delta actions) self.available_actions = self.env.get_available_buttons() not_supported_actions = [ Button.LOOK_UP_DOWN_DELTA, Button.TURN_LEFT_RIGHT_DELTA, Button.MOVE_LEFT_RIGHT_DELTA, Button.MOVE_UP_DOWN_DELTA, Button.MOVE_FORWARD_BACKWARD_DELTA ] # print(available_actions) assert len((set(self.available_actions) - set(not_supported_actions))) \ == len(self.available_actions) self.metadata['render_modes'] = ['rgb_array'] # Allow only one button to be pressed at a given step self.action_space = gym.spaces.Discrete( self.env.get_available_buttons_size() - 1) self.rows = scaled_resolution[0] self.columns = scaled_resolution[1] self.observation_space = gym.spaces.Box(low=0.0, high=1.0, shape=(self.rows, self.columns, 3), dtype=np.float32) self._rgb_array = None self.steps = 0 self.global_steps = 0 self.reset() def _process_image(self, img): # PIL resize has indexing opposite to numpy array img = np.array(Image.fromarray(img).resize((self.columns, self.rows))) img = img.astype(np.float32) img = img / 255.0 return img def update_game_variables(self): """ Check and update game variables. """ # read game variables new_v = { k: self.env.get_game_variable(v) for k, v in game_variables.items() } assert all(v.is_integer() or k[-2:] in ['_x', '_y', '_z'] for k, v in new_v.items()) new_v = { k: (int(v) if v.is_integer() else float(v)) for k, v in new_v.items() } health = new_v['health'] armor = new_v['armor'] # check game variables assert 0 <= health <= 200 or health < 0 and self.env.is_player_dead() assert 0 <= armor <= 200, (health, armor) # update actor properties self.prev_properties = self.properties self.properties = new_v def update_reward(self): """ Update reward. """ # we need to know the current and previous properties assert self.prev_properties is not None and self.properties is not None reward = 0 # kill d = self.properties['score'] - self.prev_properties['score'] if d > 0: self.cum_kills += d reward += d * default_reward_values['KILL'] # death if self.env.is_player_dead(): reward += default_reward_values['DEATH'] # suicide if self.properties['frag_count'] < self.prev_properties['frag_count']: reward += default_reward_values['SUICIDE'] # found / lost health d = self.properties['health'] - self.prev_properties['health'] if d != 0: if d > 0: reward += default_reward_values['MEDIKIT'] else: reward += default_reward_values['INJURED'] # found / lost armor d = self.properties['armor'] - self.prev_properties['armor'] if d != 0: if d > 0: reward += default_reward_values['ARMOR'] # found / lost ammo d = self.properties['sel_ammo'] - self.prev_properties['sel_ammo'] if d != 0: if d > 0: reward += default_reward_values['AMMO'] else: reward += default_reward_values['USE_AMMO'] # distance # turn_left = (Button.TURN_LEFT == self.available_actions[action]) # turn_right = (Button.TURN_RIGHT == self.available_actions[action]) # if not (turn_left or turn_right): diff_x = self.properties['position_x'] - self.prev_properties[ 'position_x'] diff_y = self.properties['position_y'] - self.prev_properties[ 'position_y'] distance = np.sqrt(diff_x**2 + diff_y**2) if distance > self.distance_threshold: reward += default_reward_values['DISTANCE'] * distance else: reward += default_reward_values['STANDSTILL'] # living reward += default_reward_values['LIVING'] return reward # def increase_difficulty(self): # self.curr_skill += 1 # self.env.close() # self.env.set_doom_skill(self.curr_skill) # self.env.init() # print('changing skill to', self.curr_skill) # def update_map(self): # self.map_level += 1 # map_str = 'map0' + str(self.map_level) # # go with initial wad file if there's still maps on it # self.env.close() # self.env.set_doom_map(map_str) # self.env.init() def sub_reset(self): """Reset environment""" self.steps = 0 self.cum_kills = np.array([0]) self.prev_properties = None self.properties = None self.env.new_episode() self._rgb_array = self.env.get_state().screen_buffer observation = self._process_image(self._rgb_array) return observation def reset(self): observation = self.sub_reset() return observation def sub_step(self, action): """Take step""" one_hot_action = np.zeros(self.action_space.n, dtype=int) one_hot_action[action] = 1 # ALWAYS SPRINTING one_hot_action = np.append(one_hot_action, [1]) assert len(one_hot_action) == len(self.env.get_available_buttons()) _ = self.env.make_action(list(one_hot_action), self._action_frame_repeat) self.update_game_variables() if self.steps > 1: reward = self.update_reward() else: reward = 0 self.steps += 1 self.global_steps += 1 done = self.env.is_episode_finished() # state is available only if the episode is still running if not done: self._rgb_array = self.env.get_state().screen_buffer observation = self._process_image(self._rgb_array) return observation, reward, done def step(self, action): observation, reward, done = self.sub_step(action) return observation, reward, done, {} def close(self): """Close environment""" self.env.close() def seed(self, seed=None): """Seed""" if seed: self.env.set_seed(seed) def render(self, mode='human'): """Render frame""" if mode == 'rgb_array': return self._rgb_array raise NotImplementedError
class VizdoomEnv(gym.Env): def __init__(self, level): # init game self.game = DoomGame() self.game.set_screen_resolution(ScreenResolution.RES_640X480) scenarios_dir = os.path.join(os.path.dirname(__file__), 'scenarios') self.game.load_config(os.path.join(scenarios_dir, CONFIGS[level][0])) self.game.set_window_visible(False) self.game.init() self.state = None self.action_space = spaces.Discrete(CONFIGS[level][1]) self.observation_space = spaces.Box( 0, 255, (self.game.get_screen_height(), self.game.get_screen_width(), self.game.get_screen_channels()), dtype=np.uint8) self.viewer = None def step(self, action): # convert action to vizdoom action space (one hot) act = np.zeros(self.action_space.n) act[action] = 1 act = np.uint8(act) act = act.tolist() reward = self.game.make_action(act) state = self.game.get_state() done = self.game.is_episode_finished() info = {} if not done: observation = np.transpose(state.screen_buffer, (1, 2, 0)) else: observation = np.uint8(np.zeros(self.observation_space.shape)) info = {"episode": {"r": self.game.get_total_reward()}} return observation, reward, done, info def seed(self, seed): self.game.set_seed(seed) def close(self): self.game.close() def reset(self): self.game.new_episode() self.state = self.game.get_state() img = self.state.screen_buffer return np.transpose(img, (1, 2, 0)) def render(self, mode='human'): try: img = self.game.get_state().screen_buffer img = np.transpose(img, [1, 2, 0]) if self.viewer is None: self.viewer = rendering.SimpleImageViewer() self.viewer.imshow(img) except AttributeError: pass @staticmethod def get_keys_to_action(): # you can press only one key at a time! keys = { (): 2, (ord('a'), ): 0, (ord('d'), ): 1, (ord('w'), ): 3, (ord('s'), ): 4, (ord('q'), ): 5, (ord('e'), ): 6 } return keys
def train(conf): #to get total time of training start_time = time.time() game = DoomGame() game.load_config("VizDoom/scenarios/defend_the_center.cfg") game.set_sound_enabled(True) game.set_screen_resolution(ScreenResolution.RES_640X480) game.set_window_visible(False) game.set_living_reward(0.1) game.init() game.new_episode() game_state = game.get_state() misc = game_state.game_variables # [KILLCOUNT, AMMO, HEALTH] prev_misc = misc action_size = game.get_available_buttons_size() agent = RandomAgent(action_size, conf) episode = conf.episode # Start training GAME = 0 t = 0 max_life = 0 # Maximum episode life (Proxy for agent performance) life = 0 scores, episodes, steps, kills, ammos = [], [], [], [], [] step = 0 episode = conf.episode e = 0 score = 0 while e < episode: loss = 0 Q_max = 0 r_t = 0 a_t = np.zeros([action_size]) action_idx = agent.select_action() a_t[action_idx] = 1 a_t = a_t.astype(int) r_t = game.make_action(a_t.tolist(), 4) game_state = game.get_state() # Observe again after we take the action is_terminated = game.is_episode_finished() score += r_t step += 1 if (is_terminated): if (life > max_life): max_life = life GAME += 1 kills.append(misc[0]) ammos.append(misc[1]) print ("Episode Finish ", misc) # print(scores) game.new_episode() game_state = game.get_state() misc = game_state.game_variables x_t1 = game_state.screen_buffer scores.append(score) score = 0 steps.append(step) episodes.append(e) e += 1 misc = game_state.game_variables r_t = agent.shape_reward(r_t, misc, prev_misc, t) if (is_terminated): life = 0 else: life += 1 # Update the cache prev_misc = misc t += 1 total_time = time.time() - start_time return steps, scores, total_time, kills, ammos # return steps, returns, total_time
def play(self): # Create DoomGame instance. It will run the game and communicate with you. print("Initializing doom...") game = DoomGame() game.load_config("./examples/config/deepdoomplayer.cfg") game.init() print("Doom initialized.") episodes = 1 training_steps_per_epoch = 100 sleep_time = 0.100 train_episodes_finished = 0 train_rewards = [] for epoch in range(episodes): train_loss = [] game.new_episode() while (train_episodes_finished < 20): sleep(sleep_time) if game.is_episode_finished(): r = game.get_total_reward() train_rewards.append(r) game.new_episode() train_episodes_finished += 1 self._last_state = None self.last_action[1] = 1 # first frame must be handled differently if self.last_state is None: # the _last_state will contain the image data from the last self.state_frames frames self.last_state = np.stack(tuple( self.convert_image(game.get_state().image_buffer) for _ in range(self.state_frames)), axis=2) continue reward = game.make_action( DeepDoomPlayer.define_keys_to_action_pressed( self.last_action), 7) reward *= 0.01 imagebuffer = game.get_state().image_buffer if imagebuffer is None: terminal = True screen_resized_binary = np.zeros((40, 40)) imagebufferlast = imagebuffer if imagebuffer is not None: terminal = False screen_resized_binary = self.convert_image(imagebuffer) # add dimension screen_resized_binary = np.expand_dims(screen_resized_binary, axis=2) current_state = np.append(self.last_state[:, :, 1:], screen_resized_binary, axis=2) self.last_state = current_state self.last_action = self.choose_next_action_only_on_q() print(train_episodes_finished, "training episodes played.") print("Training results:") train_rewards = np.array(train_rewards) print("mean:", train_rewards.mean(), "std:", train_rewards.std(), "max:", train_rewards.max(), "min:", train_rewards.min()) # It will be done automatically anyway but sometimes you need to do it in the middle of the program... game.close() self._last_state = None
class ViZDoom(Environment): """ ViZDoom environment (https://github.com/mwydmuch/ViZDoom). """ def __init__(self, config_file): """ Initialize ViZDoom environment. Args: config_file: .cfg file path, which defines how a world works and look like (maps) """ self.game = DoomGame() # load configurations from file self.game.load_config(config_file) self.game.init() self.state_shape = self.featurize(self.game.get_state()).shape self.num_actions = len(self.game.get_available_buttons()) def __str__(self): return 'ViZDoom' def states(self): return dict(type='float', shape=self.state_shape) def actions(self): return dict(type='int', shape=(), num_values=self.num_actions) def close(self): self.game.close() def reset(self): self.game.new_episode() return self.featurize(self.game.get_state()) def seed(self, seed): self.game.setSeed(seed) return seed def featurize(self, state): H = state.screen_buffer.shape[0] W = state.screen_buffer.shape[1] _vars = state.game_variables.reshape(-1).astype(np.float32) _screen_buf = state.screen_buffer.reshape(-1).astype(np.float32) if state.depth_buffer is None: _depth_buf = np.zeros(H * W * 1, dtype=np.float32) else: _depth_buf = state.depth_buffer.reshape(-1).astype(np.float32) if state.labels_buffer is None: _labels_buf = np.zeros(H * W * 1, dtype=np.float32) else: _labels_buf = state.labels_buffer.reshape(-1).astype(np.float32) if state.automap_buffer is None: _automap_buf = np.zeros(H * W * 1, dtype=np.float32) else: _automap_buf = state.automap_buffer.reshape(-1).astype(np.float32) return np.concatenate( (_vars, _screen_buf, _depth_buf, _labels_buf, _automap_buf)) def execute(self, action): one_hot_enc = [0] * self.num_actions one_hot_enc[action] = 1 reward = self.game.make_action(one_hot_enc) terminal = self.game.is_episode_finished() states = self.featurize(self.game.get_state()) return states, terminal, reward
def play(self): # Create DoomGame instance. It will run the game and communicate with you. print ("Initializing doom...") game = DoomGame() game.load_config("./examples/config/deepdoomplayer.cfg") game.init() print ("Doom initialized.") episodes = 1 training_steps_per_epoch = 100 sleep_time = 0.100 train_episodes_finished = 0 train_rewards = [] for epoch in range(episodes): train_loss = [] game.new_episode() while(train_episodes_finished < 20 ): sleep(sleep_time) if game.is_episode_finished(): r = game.get_total_reward() train_rewards.append(r) game.new_episode() train_episodes_finished += 1 self._last_state = None self.last_action[1] = 1 # first frame must be handled differently if self.last_state is None: # the _last_state will contain the image data from the last self.state_frames frames self.last_state = np.stack(tuple(self.convert_image(game.get_state().image_buffer) for _ in range(self.state_frames)), axis=2) continue reward = game.make_action(DeepDoomPlayer.define_keys_to_action_pressed(self.last_action), 7) reward *= 0.01 imagebuffer = game.get_state().image_buffer if imagebuffer is None: terminal = True screen_resized_binary = np.zeros((40,40)) imagebufferlast = imagebuffer if imagebuffer is not None: terminal = False screen_resized_binary = self.convert_image(imagebuffer) # add dimension screen_resized_binary = np.expand_dims(screen_resized_binary, axis=2) current_state = np.append(self.last_state[:, :, 1:], screen_resized_binary, axis=2) self.last_state = current_state self.last_action = self.choose_next_action_only_on_q() print (train_episodes_finished, "training episodes played.") print ("Training results:") train_rewards = np.array(train_rewards) print ("mean:", train_rewards.mean(), "std:", train_rewards.std(), "max:", train_rewards.max(), "min:", train_rewards.min()) # It will be done automatically anyway but sometimes you need to do it in the middle of the program... game.close() self._last_state = None
class Vizdoom_env(object): def __init__(self, config='vizdoom_env/asset/default.cfg', verbose=False, perception_type='more_simple'): self.verbose = verbose self.game = DoomGame() self.game.load_config(config) if self.verbose: self.game.set_window_visible(True) self.game.set_screen_resolution(ScreenResolution.RES_1280X960) self.game_variables = self.game.get_available_game_variables() self.buttons = self.game.get_available_buttons() self.action_strings = [b.__str__().replace('Button.', '') for b in self.buttons] self.game_variable_strings = [v.__str__().replace('GameVariable.', '') for v in self.game_variables] self.perception_type = perception_type if perception_type == 'clear': self.distance_dict = CLEAR_DISTANCE_DICT self.horizontal_dict = CLEAR_HORIZONTAL_DICT elif perception_type == 'simple': pass elif perception_type == 'more_simple': pass else: self.distance_dict = DISTANCE_DICT self.horizontal_dict = HORIZONTAL_DICT def init_game(self): self.game.init() self.new_episode() def new_episode(self, init_state=None): self.game.new_episode() if init_state is not None: self.initialize_state(init_state) self.take_action('NONE') state = self.game.get_state() if state is None: raise RuntimeError('Cannot get initial states') img_arr = np.transpose(state.screen_buffer.copy(), [1, 2, 0]) self.x_size = img_arr.shape[1] self.y_size = img_arr.shape[0] self.channel = img_arr.shape[2] self.get_state() if self.verbose: self.call_all_perception_primitives() p_v = self.get_perception_vector() self.s_h = [img_arr.copy()] self.a_h = [] self.p_v_h = [p_v.copy()] # perception vector def end_game(self): self.game.close() def state_transition(self, action_string): if action_string == 'NONE' or action_string in self.action_strings: self.take_action(action_string) self.a_h.append(action_string) if self.verbose: self.print_state() if FRAME_SKIP[action_string][2] == 0: self.get_state() self.s_h.append(self.screen.copy()) p_v = self.get_perception_vector() self.p_v_h.append(p_v.copy()) # perception vector self.post_none(action_string) if FRAME_SKIP[action_string][2] == 1: self.get_state() self.s_h.append(self.screen.copy()) p_v = self.get_perception_vector() self.p_v_h.append(p_v.copy()) # perception vector if self.verbose: self.call_all_perception_primitives() else: raise ValueError('Unknown action') def call_all_perception_primitives(self): for actor in MONSTER_LIST + ITEMS_IN_INTEREST: self.in_target(actor) for dist in self.distance_dict.keys(): for horz in self.horizontal_dict.keys(): self.exist_actor_in_distance_horizontal(actor, dist, horz) for weapon_slot in range(1, 10): self.have_weapon(weapon_slot) self.have_ammo(weapon_slot) self.selected_weapon(weapon_slot) for actor in MONSTER_LIST: self.is_there(actor) self.no_selected_weapon_ammo() def take_action(self, action): action_vector = [a == action for a in self.action_strings] frame_skip = FRAME_SKIP[action][0] if action == 'ATTACK': state = self.game.get_state() gv_values = dict(zip(self.game_variable_strings, state.game_variables)) weapon_num = int(gv_values['SELECTED_WEAPON']) frame_skip = ATTACK_FRAME_SKIP[weapon_num] self.game.make_action(action_vector, frame_skip) def post_none(self, action): none_vector = [a == 'NONE' for a in self.action_strings] self.game.make_action(none_vector, FRAME_SKIP[action][1]) def get_action_list(self): return self.action_strings def init_actors(self): self.actors = {} def check_and_add_to_actors(self, actor_name, label): if actor_name not in self.actors: self.actors[actor_name] = [] self.actors[actor_name].append(label) def get_actor_by_name(self, actor_name): if actor_name not in self.actors: self.actors[actor_name] = [] return self.actors[actor_name] def get_state(self): state = self.game.get_state() if state is None: self.game_variables = dict() self.player = None self.monsters = [] self.ammo = [] self.init_actors() return self.game_variable_values = dict(zip(self.game_variable_strings, state.game_variables)) self.monsters = [] self.ammo = [] self.weapons = [] self.actors = {} for l in state.labels: if l.object_name in PLAYER_NAME: self.player = l elif l.object_name in MONSTER_LIST: self.monsters.append(l) self.check_and_add_to_actors(l.object_name, l) else: self.check_and_add_to_actors(l.object_name, l) self.labels = state.labels self.screen = np.transpose(state.screen_buffer, [1, 2, 0]).copy() def get_perception_vector_cond(self): if self.perception_type == 'simple' or \ self.perception_type == 'more_simple': return self.get_perception_vector_cond_simple() else: return self.get_perception_vector_cond_basic() def get_perception_vector_cond_basic(self): vec = [] for dist in self.distance_dict.keys(): for horz in self.horizontal_dict.keys(): for actor in MONSTER_LIST + ITEMS_IN_INTEREST: vec.append('EXIST {} IN {} {}'.format(actor, dist, horz)) for actor in MONSTER_LIST: vec.append('INTARGET {}'.format(actor)) return vec def get_perception_vector_cond_simple(self): vec = [] for actor in MONSTER_LIST: vec.append('ISTHERE {}'.format(actor)) if self.perception_type == 'more_simple': return vec for actor in MONSTER_LIST: vec.append('INTARGET {}'.format(actor)) return vec def get_perception_vector(self): if self.perception_type == 'simple' or\ self.perception_type == 'more_simple': return self.get_perception_vector_simple() else: return self.get_perception_vector_basic() def get_perception_vector_basic(self): vec = [] for dist in self.distance_dict.keys(): for horz in self.horizontal_dict.keys(): for actor in MONSTER_LIST + ITEMS_IN_INTEREST: vec.append(self.exist_actor_in_distance_horizontal(actor, dist, horz)) for actor in MONSTER_LIST: vec.append(self.in_target(actor)) return np.array(vec) def get_perception_vector_simple(self): vec = [] for actor in MONSTER_LIST: vec.append(self.is_there(actor)) if self.perception_type == 'more_simple': return np.array(vec) for actor in MONSTER_LIST: vec.append(self.in_target(actor)) return np.array(vec) def print_state(self): state = self.game.get_state() if state is None: print('No state') return game_variables = dict(zip(self.game_variable_strings, state.game_variables)) game_variable_print = '' for key in sorted(game_variables.keys()): game_variable_print += '{}: {}, '.format(key, game_variables[key]) game_variable_print += '\n' print(game_variable_print) for l in state.labels: print("id: {id}, name: {name}, position: [{pos_x},{pos_y},{pos_z}], " "velocity: [{vel_x},{vel_y},{vel_z}], " "angle: [{angle},{pitch},{roll}], " "box: [{x},{y},{width},{height}]\n".format( id=l.object_id, name=l.object_name, pos_x=l.object_position_x, pos_y=l.object_position_y, pos_z=l.object_position_z, vel_x=l.object_velocity_x, vel_y=l.object_velocity_y, vel_z=l.object_velocity_z, angle=l.object_angle, pitch=l.object_pitch, roll=l.object_roll, x=l.x, y=l.y, width=l.width, height=l.height)) def is_there(self, actor): if len(self.get_actor_by_name(actor)) > 0: if self.verbose: print('ISTHERE {}'.format(actor)) return True else: return False def in_target(self, actor): center_x = self.x_size / 2 center_y = self.y_size / 2 for a in self.get_actor_by_name(actor): a_x_min, a_x_max = a.x, a.x + a.width a_y_min, a_y_max = a.y, a.y + a.height if center_x > a_x_min and center_x < a_x_max and\ center_y > a_y_min and center_y < a_y_max: if self.verbose: print('INTARGET {}'.format(actor)) return True return False def exist_actor_in_distance_horizontal(self, actor, dist, horz): cen_x = self.x_size / 2 p = self.player for a in self.get_actor_by_name(actor): a_x_min, a_x_max = a.x, a.x + a.width d_x = a.object_position_x - p.object_position_x d_y = a.object_position_y - p.object_position_y d = math.sqrt(d_x**2 + d_y**2) if self.distance_dict[dist](d) and self.horizontal_dict[horz](a_x_min, a_x_max, cen_x): if self.verbose: print('EXIST {} in {} {}'.format(actor, dist, horz)) return True return False # Weapons # 1: Fist, chainsaw, 2: pistol, 3: shotgun, 4: chaingun, 5: rocket launcher, 6: plazma rifle # SELECT_WEAPON_1 switch between fist and chainsaw def have_weapon(self, weapon_slot): if self.game_variable_values['WEAPON{}'.format(weapon_slot)] > 0: if self.verbose: print('Have weapon {}'.format(weapon_slot)) return True return False def have_ammo(self, weapon_slot): if weapon_slot == 1: # Fist or Chainsaw if self.verbose: print('Have ammo {}'.format(weapon_slot)) return True if self.game_variable_values['AMMO{}'.format(weapon_slot)] > 0: if self.verbose: print('Have ammo {}'.format(weapon_slot)) return True return False def selected_weapon(self, weapon_slot): if self.game_variable_values['SELECTED_WEAPON'] == weapon_slot: if self.verbose: print('Weapon {} is selected'.format(weapon_slot)) return True return False def no_selected_weapon_ammo(self): if self.game_variable_values['SELECTED_WEAPON_AMMO'] == 0: if self.verbose: print('no selected weapon ammo is left') return True return False def initialize_state(self, init_state): """ Takes random arguments and initialies the state Assumes that the max number of monster and ammo spawns is 5 Params: init_state [{"player_pos": [x, y], "monster_pos": [[x1, y1], [x2, y2]]}] """ if 'player_pos' in init_state: x, y = init_state['player_pos'] self.game.send_game_command('puke 20 {} {}'.format(x, y)) if 'demon_pos' in init_state: for i, (x, y) in enumerate(init_state['demon_pos']): self.game.send_game_command( 'puke {} {} {}'.format(21 + i, x, y)) if 'revenant_pos' in init_state: for i, (x, y) in enumerate(init_state['revenant_pos']): self.game.send_game_command( 'puke {} {} {}'.format(5 + i, x, y)) if 'hellknight_pos' in init_state: for i, (x, y) in enumerate(init_state['hellknight_pos']): self.game.send_game_command( 'puke {} {} {}'.format(15 + i, x, y)) if 'ammo_pos' in init_state: for i, (x, y) in enumerate(init_state['ammo_pos']): self.game.send_game_command( 'puke {} {} {}'.format(10 + i, x, y))
class ViZDoom(Environment): """ [ViZDoom](https://github.com/mwydmuch/ViZDoom) environment adapter (specification key: `vizdoom`). Args: level (string): ViZDoom configuration file (<span style="color:#C00000"><b>required</b></span>). include_variables (bool): Whether to include game variables to state (<span style="color:#00C000"><b>default</b></span>: false). factored_action (bool): Whether to use factored action representation (<span style="color:#00C000"><b>default</b></span>: false). visualize (bool): Whether to visualize interaction (<span style="color:#00C000"><b>default</b></span>: false). frame_skip (int > 0): Number of times to repeat an action without observing (<span style="color:#00C000"><b>default</b></span>: 12). seed (int): Random seed (<span style="color:#00C000"><b>default</b></span>: none). """ def __init__(self, level, visualize=False, include_variables=False, factored_action=False, frame_skip=12, seed=None): from vizdoom import DoomGame, Mode, ScreenFormat, ScreenResolution self.config_file = level self.include_variables = include_variables self.factored_action = factored_action self.visualize = visualize self.frame_skip = frame_skip self.environment = DoomGame() self.environment.load_config(self.config_file) if self.visualize: self.environment.set_window_visible(True) self.environment.set_mode(Mode.ASYNC_PLAYER) else: self.environment.set_window_visible(False) self.environment.set_mode(Mode.PLAYER) # e.g. CRCGCB, RGB24, GRAY8 self.environment.set_screen_format(ScreenFormat.RGB24) # e.g. RES_320X240, RES_640X480, RES_1920X1080 self.environment.set_screen_resolution(ScreenResolution.RES_640X480) self.environment.set_depth_buffer_enabled(False) self.environment.set_labels_buffer_enabled(False) self.environment.set_automap_buffer_enabled(False) if seed is not None: self.environment.setSeed(seed) self.environment.init() self.state_shape = (640, 480, 3) self.num_variables = self.environment.get_available_game_variables_size( ) self.num_buttons = self.environment.get_available_buttons_size() self.actions = [ tuple(a) for a in itertools.product([0, 1], repeat=self.num_buttons) ] def __str__(self): return super().__str__() + '({})'.format(self.config_file) def states(self): if self.include_variables: return OrderedDict(screen=dict(type='float', shape=self.state_shape), variables=dict(type='float', shape=self.num_variables)) else: return dict(type='float', shape=self.state_shape) def actions(self): if self.factored_action: return dict(type='bool', shape=self.num_buttons) else: return dict(type='int', shape=(), num_values=len(self.actions)) def close(self): self.environment.close() self.environment = None def get_states(self): state = self.environment.get_state() screen = state.screen_buffer.astype(dtype=np.float32) / 255.0 if self.include_variables: return OrderedDict(screen=screen, variables=state.game_variables) else: return screen def reset(self): self.environment.new_episode() return self.get_states() def execute(self, actions): if self.factored_action: action = np.where(actions, 1.0, 0.0) else: action = self.actions[actions] if self.visualize: self.environment.set_action(action) reward = 0.0 for _ in range(self.frame_skip): self.environment.advance_action() reward += self.environment.get_last_reward() else: reward = self.environment.make_action(action, self.frame_skip) terminal = self.environment.is_episode_finished() states = self.get_states() return states, terminal, reward
class VizDoomEnv(Env): ''' Wrapper for vizdoom to use as an OpenAI gym environment. ''' metadata = {'render.modes': ['human', 'rgb_array']} def __init__(self, cfg_name, repeat=1): super(VizDoomEnv, self).__init__() self.game = DoomGame() self.game.load_config('./slm_lab/env/vizdoom/cfgs/' + cfg_name + '.cfg') self._viewer = None self.repeat = 1 # TODO In future, need to update action to handle (continuous) DELTA buttons using gym's Box space self.action_space = spaces.MultiDiscrete( [2] * self.game.get_available_buttons_size()) self.action_space.dtype = 'uint8' output_shape = (self.game.get_screen_height(), self.game.get_screen_width(), self.game.get_screen_channels()) self.observation_space = spaces.Box(low=0, high=255, shape=output_shape, dtype='uint8') self.game.init() def close(self): self.game.close() if self._viewer is not None: self._viewer.close() self._viewer = None def seed(self, seed=None): self.game.set_seed(seed) def step(self, action): reward = self.game.make_action(list(action), self.repeat) state = self.game.get_state() done = self.game.is_episode_finished() # info = self._get_game_variables(state.game_variables) info = {} if state is not None: observation = state.screen_buffer.transpose(1, 2, 0) else: observation = np.zeros(shape=self.observation_space.shape, dtype=np.uint8) return observation, reward, done, info def reset(self): # self.seed(seed) self.game.new_episode() return self.game.get_state().screen_buffer.transpose(1, 2, 0) def render(self, mode='human', close=False): if close: if self._viewer is not None: self._viewer.close() self._viewer = None return img = None state = self.game.get_state() if state is not None: img = state.screen_buffer if img is None: # at the end of the episode img = np.zeros(shape=self.observation_space.shape, dtype=np.uint8) if mode == 'rgb_array': return img elif mode is 'human': if self._viewer is None: self._viewer = rendering.SimpleImageViewer() self._viewer.imshow(img.transpose(1, 2, 0)) def _get_game_variables(self, state_variables): info = {} if state_variables is not None: info['KILLCOUNT'] = state_variables[0] info['ITEMCOUNT'] = state_variables[1] info['SECRETCOUNT'] = state_variables[2] info['FRAGCOUNT'] = state_variables[3] info['HEALTH'] = state_variables[4] info['ARMOR'] = state_variables[5] info['DEAD'] = state_variables[6] info['ON_GROUND'] = state_variables[7] info['ATTACK_READY'] = state_variables[8] info['ALTATTACK_READY'] = state_variables[9] info['SELECTED_WEAPON'] = state_variables[10] info['SELECTED_WEAPON_AMMO'] = state_variables[11] info['AMMO1'] = state_variables[12] info['AMMO2'] = state_variables[13] info['AMMO3'] = state_variables[14] info['AMMO4'] = state_variables[15] info['AMMO5'] = state_variables[16] info['AMMO6'] = state_variables[17] info['AMMO7'] = state_variables[18] info['AMMO8'] = state_variables[19] info['AMMO9'] = state_variables[20] info['AMMO0'] = state_variables[21] return info
class VizDoomEnv(gym.Env): ''' Wrapper for vizdoom to use as an OpenAI gym environment. ''' metadata = {'render.modes': ['human', 'rgb_array']} def __init__(self, params): super(VizDoomEnv, self).__init__() self.params = params self.game = DoomGame() self.game.load_config(params.scenarioPath) self._viewer = None self.frameskip = params.frameskip self.inputShape = params.inputShape self.sequenceLength = params.sequenceLength self.seqInputShape = (self.inputShape[0] * self.sequenceLength, self.inputShape[1], self.inputShape[2]) self.gameVariables = params.gameVariables self.numGameVariables = len(self.gameVariables) self.action_space = spaces.MultiDiscrete( [2] * self.game.get_available_buttons_size()) self.action_space.dtype = 'uint8' output_shape = (self.game.get_screen_channels(), self.game.get_screen_height(), self.game.get_screen_width()) self.observation_space = spaces.Box(low=0, high=255, shape=output_shape, dtype='uint8') self.game.init() # Maintain a buffer of last seq len frames. self.frameBuffer = [np.zeros(self.inputShape)] * self.sequenceLength def close(self): self.game.close() if self._viewer is not None: self._viewer.close() self._viewer = None def seed(self, seed=None): self.game.set_seed(seed) def step(self, action): reward = self.game.make_action(list(action), self.frameskip) state = self.game.get_state() done = self.game.is_episode_finished() if state is not None: observation = state.screen_buffer info = state.game_variables # Return the chosen game variables in info else: observation = np.zeros(shape=self.observation_space.shape, dtype=np.uint8) info = None processedObservation = self._preProcessImage(observation) del self.frameBuffer[0] self.frameBuffer.append(processedObservation) return self.frameBuffer, reward, done, info # Preprocess image for use in network def _preProcessImage(self, image): if image.shape != self.inputShape: image = cv2.resize(image.transpose(1, 2, 0), (self.inputShape[2], self.inputShape[1]), interpolation=cv2.INTER_AREA).transpose( 2, 0, 1) return image def reset(self): self.game.new_episode() state = self._preProcessImage(self.game.get_state().screen_buffer) self.frameBuffer = [state] * self.sequenceLength return self.frameBuffer def render(self, mode='human', close=False): if close: if self._viewer is not None: self._viewer.close() self._viewer = None return img = None state = self.game.get_state() if state is not None: img = state.screen_buffer if img is None: # at the end of the episode img = np.zeros(shape=self.observation_space.shape, dtype=np.uint8) if mode == 'rgb_array': return img elif mode is 'human': if self._viewer is None: self._viewer = rendering.SimpleImageViewer() self._viewer.imshow(img.transpose(1, 2, 0))
game.init() # Creates all possible actions depending on how many buttons there are. actions_num = game.get_available_buttons_size() actions = [] for perm in it.product([False, True], repeat=actions_num): actions.append(list(perm)) episodes = 10 sleep_time = 0.028 for i in range(episodes): print("Episode #" + str(i + 1)) # Not needed for the first episdoe but the loop is nicer. game.new_episode() while not game.is_episode_finished(): # Gets the state and possibly to something with it s = game.get_state() img = s.image_buffer misc = s.game_variables # Makes a random action and save the reward. r = game.make_action(choice(actions)) # Makes a "prolonged" action and skip frames: # skiprate = 3 # r = game.make_action(choice(actions), skiprate) # The same could be achieved with:
def start(self): """ this will get passed hier """ # Create DoomGame instance. It will run the game and communicate with you. print ("Initializing doom...") game = DoomGame() game.load_config("./examples/config/learningtensorflow.cfg") game.init() print ("Doom initialized.") train_rewards = [] for epoch in range(DeepDoom.episodes): print ("\nEpoch", epoch) train_time = 0 train_episodes_finished = 0 train_loss = [] #start saving after 20 epoch if epoch > 20: if not os.path.exists(DeepDoom.checkpoint_path): os.mkdir(DeepDoom.checkpoint_path) self.saver.save(self.session, DeepDoom.checkpoint_path, global_step=epoch ) train_start = time() game.new_episode() for learning_step in tqdm(range(DeepDoom.training_steps_per_epoch)): if game.is_episode_finished(): #print("game is finished") r = game.get_total_reward() train_rewards.append(r) game.new_episode() train_episodes_finished += 1 self.last_state = None #sleep(sleep_time) # first frame must be handled differently if self.last_state is None: #print ("ich bin hier") # the last_state will contain the image data from the last self.state_frames frames self.last_state = np.stack(tuple(self.convert_image(game.get_state().image_buffer) for _ in range(self.state_frames)), axis=2) continue reward = game.make_action(DeepDoom.define_keys_to_action_pressed(self.last_action), 7) reward *= 0.01 #if screen_array is not None: imagebuffer = game.get_state().image_buffer if imagebuffer is None: terminal = True #print(reward) screen_resized_binary = np.zeros((40,40)) imagebufferlast = imagebuffer if imagebuffer is not None: terminal = False screen_resized_binary = self.convert_image(imagebuffer) # add dimension screen_resized_binary = np.expand_dims(screen_resized_binary, axis=2) current_state = np.append(self.last_state[:, :, 1:], screen_resized_binary, axis=2) self.observations.append((self.last_state, self.last_action, reward, current_state, terminal)) #zeugs.write("oberservations %s \n" %len(self.observations)) if len(self.observations) > self.memory_size: self.observations.popleft() #sleep(sleep_time) # only train if done observing if len(self.observations) > self.observation_steps: #print("train") self.train() self.time += 1 self.last_state = current_state self.last_action = self.choose_next_action() if self.probability_of_random_action > self.final_random_action_prob \ and len(self.observations) > self.observation_steps: self.probability_of_random_action -= \ (self.initial_random_action_prob - self.final_random_action_prob) / self.explore_steps print (train_episodes_finished, "training episodes played.") print ("Training results:") train_rewards = np.array(train_rewards) train_end = time() train_time = train_end - train_start mean_loss = np.mean(train_loss) print ("mean:", train_rewards.mean(), "std:", train_rewards.std(), "max:", train_rewards.max(), "min:", train_rewards.min(), "epsilon:", self.probability_of_random_action) print ("t:", str(round(train_time, 2)) + "s") train_rewards = [] # It will be done automatically anyway but sometimes you need to do it in the middle of the program... game.close() self.last_state = None