def _get_game_variables(self, state_variables): info = { "LEVEL": self.level } if state_variables is None: return info info['KILLCOUNT'] = state_variables[0] info['ITEMCOUNT'] = state_variables[1] info['SECRETCOUNT'] = state_variables[2] info['FRAGCOUNT'] = state_variables[3] info['HEALTH'] = state_variables[4] info['ARMOR'] = state_variables[5] info['DEAD'] = state_variables[6] info['ON_GROUND'] = state_variables[7] info['ATTACK_READY'] = state_variables[8] info['ALTATTACK_READY'] = state_variables[9] info['SELECTED_WEAPON'] = state_variables[10] info['SELECTED_WEAPON_AMMO'] = state_variables[11] info['AMMO1'] = state_variables[12] info['AMMO2'] = state_variables[13] info['AMMO3'] = state_variables[14] info['AMMO4'] = state_variables[15] info['AMMO5'] = state_variables[16] info['AMMO6'] = state_variables[17] info['AMMO7'] = state_variables[18] info['AMMO8'] = state_variables[19] info['AMMO9'] = state_variables[20] info['AMMO0'] = state_variables[21] info['POSITION_X'] = doom_fixed_to_double(self.game.get_game_variable(GameVariable.USER1)) info['POSITION_Y'] = doom_fixed_to_double(self.game.get_game_variable(GameVariable.USER2)) return info
def step(self, action): info = {} reward = self.game.make_action(self.actions[action], self.frame_repeat) if self.reward_reshape: fixed_shaping_reward = self.game.get_game_variable( vzd.GameVariable.USER1) shaping_reward = vzd.doom_fixed_to_double(fixed_shaping_reward) shaping_reward = shaping_reward - self.last_total_shaping_reward self.last_total_shaping_reward += shaping_reward reward = shaping_reward done = self.game.is_episode_finished() if done: ob, n = self.last_input info = {'episode': {'r': self.total_reward, 'l': n}} # info['Episode_Total_Reward'] = self.total_reward # info['Episode_Total_Len'] = n else: ob, n = self.get_current_input() reward = reward * self.reward_scale self.total_reward += reward return ob, reward, done, info
def reset(self): # if we are not in the efficent(but memory consuming) fixed scenario mode # we change the scenario with a 1/10 probability if (self.params.multimaze and not self.fixed_scenario and random.randrange(0, 10) == 0): if self.is_train: idx = random.randrange(0, self.params.num_mazes_train) #print('Creating new train maze with idx={}'.format(idx)) self.game = self._create_game(self.params, idx, self.is_train) else: idx = random.randrange(0, self.params.num_mazes_test) #print('Creating new test maze with idx={}'.format(idx)) self.game = self._create_game(self.params, idx, self.is_train) self.game.new_episode() self.player_info = self.PlayerInfo( self.game.get_game_variable(GameVariable.POSITION_X), self.game.get_game_variable(GameVariable.POSITION_Y), math.radians(self.game.get_game_variable(GameVariable.ANGLE))) if GameVariable.HEALTH in self.game.get_available_game_variables(): self.previous_health = self.game.get_game_variable( GameVariable.HEALTH) if self.use_shaping: self.shaping_reward = doom_fixed_to_double( self.game.get_game_variable(GameVariable.USER1)) return self.get_observation()
def reset(self, can_gen_rand=True): if (not self.fixed_scenario and can_gen_rand and self.is_train and self.params.multimaze and self.params.num_mazes_train > 16 and random.randrange( 0, 10) == 0): # 1/10 chance to load a new map idx = random.randrange(0, self.params.num_mazes_train) print('Creating new train maze with idx={}'.format(idx)) self.game = self._create_game(self.params, idx, self.is_train) if ( not self.fixed_scenario and can_gen_rand and not self.is_train and self.params.multimaze and self.params.num_mazes_test > 1 ): # this is required during testing or the result is biased toward easier mazes idx = random.randrange(0, self.params.num_mazes_test) print('Creating new test maze with idx={}'.format(idx)) self.game = self._create_game(self.params, idx, self.is_train) self.game.new_episode() self.player_info = self.PlayerInfo( self.game.get_game_variable(GameVariable.POSITION_X), self.game.get_game_variable(GameVariable.POSITION_Y), math.radians(self.game.get_game_variable(GameVariable.ANGLE)), fixed_origin=self.params.fixed_origin) if GameVariable.HEALTH in self.game.get_available_game_variables(): self.previous_health = self.game.get_game_variable( GameVariable.HEALTH) if self.use_shaping: self.shaping_reward = doom_fixed_to_double( self.game.get_game_variable(GameVariable.USER1)) return self.get_observation()
def goal(self): data = (self.game.get_game_variable(vizdoom.USER1), self.game.get_game_variable(vizdoom.USER2), self.game.get_game_variable(vizdoom.USER3)) data = tuple(vizdoom.doom_fixed_to_double(x) for x in data) return data
def make_action(self, action): """ perform an action, includes an option to skip frames but repeat the same action. """ reward = self.game.make_action(self.action_map[action], self.frame_skip) # We shape rewards in health gathering to encourage collection of health packs if not self.use_shaping and self.is_train: reward += self._check_health() # alternatively ViZDoom offers a shaping reward in some scenarios if self.use_shaping and self.is_train: current_shaping_reward = doom_fixed_to_double( self.game.get_game_variable(GameVariable.USER1)) diff = current_shaping_reward - self.shaping_reward reward += diff self.shaping_reward += diff return reward
def make_action(self, action): """ perform an action, includes an option to skip frames but repeat the same action. """ reward = self.game.make_action(self.action_map[action], self.frame_skip) if not self.use_shaping and self.is_train: # before I was using shaping I was comparing health reward += self._check_health() count = self.frame_skip # self.game.set_action(self.action_map[action]) # self.game.advance_action(self.frame_skip) # reward = self.game.get_last_reward() # reward += self._check_health() # for skip in range(1, self.frame_skip): # if self.is_episode_finished(): # break # reward += self.game.make_action(self.action_map[action]) # reward += self._check_health() # count += 1.0 if self.no_reward_average: count = 1.0 if self.use_shaping and self.is_train: current_shaping_reward = doom_fixed_to_double( self.game.get_game_variable(GameVariable.USER1)) diff = current_shaping_reward - self.shaping_reward reward += diff self.shaping_reward += diff return reward / count
game.new_episode() # Use this to remember last shaping reward value. last_total_shaping_reward = 0 while not game.is_episode_finished(): # Gets the state and possibly to something with it state = game.get_state() # Makes a random action and save the reward. reward = game.make_action(choice(actions)) # Retrieve the shaping reward fixed_shaping_reward = game.get_game_variable(vzd.GameVariable.USER1) # Get value of scripted variable shaping_reward = vzd.doom_fixed_to_double( fixed_shaping_reward) # If value is in DoomFixed format project it to double shaping_reward = shaping_reward - last_total_shaping_reward last_total_shaping_reward += shaping_reward print("State #" + str(state.number)) print("Health: ", state.game_variables[0]) print("Last Reward:", reward) print("Last Shaping Reward:", shaping_reward) print("=====================") # Sleep some time because processing is too fast to watch. if sleep_time > 0: sleep(sleep_time) print("Episode finished!") print("Total reward:", game.get_total_reward())
def _create_game(self, params, idx, is_train, get_extra_info=False): game = DoomGame() VALID_SCENARIOS = [ 'my_way_home.cfg', 'health_gathering.cfg', 'health_gathering_supreme.cfg', 'health_gathering_supreme_no_death_penalty.cfg', 'deadly_corridor.cfg', 'defend_the_center.cfg', 'defend_the_line.cfg', 'two_color_maze014.cfg', 'labyrinth_maze000.cfg', 'labyrinth_maze11_000.cfg' ] VALID_MULTI_SCENARIOS = [ 'maze_{:003}.cfg', 'custom_scenario{:003}.cfg' 'mino_maze{:003}.cfg', 'labyrinth_maze{:003}.cfg', 'two_item_maze{:003}.cfg', 'six_item_maze{:003}.cfg', 'four_item_maze{:003}.cfg', 'eight_item_maze{:003}.cfg', 'repeated_laby_maze{:003}.cfg', 'two_color_maze{:003}.cfg', 'custom_scenario{:003}.cfg' ] if params.scenario in VALID_SCENARIOS: game.load_config(params.scenario_dir + params.scenario) elif params.scenario in VALID_MULTI_SCENARIOS: assert params.multimaze if not is_train and params.test_scenario_dir: filename = params.test_scenario_dir + params.scenario.format( idx) #print('loading file', filename) game.load_config(filename) else: if not is_train: print( 'WARNING, LOADING TRAINING DATA FOR TESTING, THIS MAY NOT BE WHAT YOU INTENDED!' ) filename = params.scenario_dir + params.scenario.format(idx) #print('loading file', filename) game.load_config(filename) else: assert 0, 'Invalid environment {}'.format(params.scenario) if params.screen_size == '320X180': # TODO: Implement options for other resolutions game.set_screen_resolution(ScreenResolution.RES_320X180) else: assert 0, 'Invalid screen_size {}'.format(params.screen_size) game.set_sound_enabled(False) #game.add_game_args("+vid_forcesurface 1") game.set_window_visible(params.show_window) if params.show_window: game.set_mode(Mode.SPECTATOR) game.add_game_args("+freelook 1") # Player variables for prediction of position etc game.add_available_game_variable(GameVariable.POSITION_X) game.add_available_game_variable(GameVariable.POSITION_Y) game.add_available_game_variable(GameVariable.POSITION_Z) game.add_available_game_variable(GameVariable.VELOCITY_X) game.add_available_game_variable(GameVariable.VELOCITY_Y) game.add_available_game_variable(GameVariable.VELOCITY_Z) game.add_available_game_variable(GameVariable.ANGLE) game.add_available_game_variable(GameVariable.PITCH) game.add_available_game_variable(GameVariable.ROLL) if get_extra_info: game.set_labels_buffer_enabled(True) game.set_automap_buffer_enabled(True) game.set_automap_mode(AutomapMode.OBJECTS) game.set_automap_rotate(True) game.set_automap_render_textures(False) game.set_depth_buffer_enabled(True) game.init() if GameVariable.HEALTH in game.get_available_game_variables(): self.previous_health = game.get_game_variable(GameVariable.HEALTH) if self.use_shaping: self.shaping_reward = doom_fixed_to_double( game.get_game_variable(GameVariable.USER1)) if params.disable_head_bob: game.send_game_command('movebob 0.0') return game
def _create_game(self, params, idx, is_train, get_extra_info=False): game = DoomGame() self.idx = idx game.set_window_visible(params.show_window) game.set_sound_enabled(False) game.add_game_args("+vid_forcesurface 1") VALID_SCENARIOS = [ 'my_way_home.cfg', 'health_gathering.cfg', 'health_gathering_supreme.cfg', 'health_gathering_supreme_no_death_penalty.cfg', 'deadly_corridor.cfg', 'defend_the_center.cfg', 'defend_the_line.cfg', 'custom_maze_001.cfg', 'custom_maze_002.cfg', 'custom_maze_003.cfg', 'custom_mazes_005/train/maze_000.cfg', 'custom_mazes_005/train/maze_004.cfg', 'custom_mazes_005/valid/maze_000.cfg', 'long_term_base.cfg', 'scenario_x.cfg', 'scenario_cw2.cfg', 'scenario_2_item0.cfg', 'scenario_2_item1.cfg', 'scenario_2_item2.cfg', 'scenario_2_item3.cfg', 'scenario_3_item0.cfg', 'two_color_maze040.cfg', 'four_item_maze034.cfg', 'labyrinth_maze000.cfg', 'mino_maze000.cfg', 'labyrinth_maze11_000.cfg', 'mino_maze_simple.cfg' ] VALID_MULTI_SCENARIOS = [ 'maze_{:003}.cfg', 'mino_maze{:003}.cfg', 'labyrinth_maze{:003}.cfg', 'indicator_maze{:003}.cfg', 'two_item_maze{:003}.cfg', 'six_item_maze{:003}.cfg', 'four_item_maze{:003}.cfg', 'eight_item_maze{:003}.cfg', 'repeated_laby_maze{:003}.cfg', 'two_color_maze{:003}.cfg' ] if params.scenario in VALID_SCENARIOS: game.load_config(params.scenario_dir + params.scenario) elif params.scenario in VALID_MULTI_SCENARIOS: assert params.multimaze if not is_train and params.test_scenario_dir: filename = params.test_scenario_dir + params.scenario.format( idx) #print('loading file', filename) game.load_config(filename) else: filename = params.scenario_dir + params.scenario.format(idx) #print('loading file', filename) game.load_config(filename) elif params.scenario == 'curriculum': pass else: assert 0, 'Invalid environment {}'.format(params.scenario) if params.screen_size == '320X180': game.set_screen_resolution(ScreenResolution.RES_320X180) else: assert 0, 'Invalid screen_size {}'.format(params.screen_size) if (params.use_depth or params.predict_depth or params.ego_model or params.depth_as_obs): game.set_depth_buffer_enabled(True) #self.game.set_labels_buffer_enabled(True) game.set_window_visible(params.show_window) game.set_sound_enabled(False) if params.show_window: game.set_mode(Mode.SPECTATOR) game.add_game_args("+freelook 1") # Player variables for prediction of position etc game.add_available_game_variable(GameVariable.POSITION_X) game.add_available_game_variable(GameVariable.POSITION_Y) game.add_available_game_variable(GameVariable.POSITION_Z) game.add_available_game_variable(GameVariable.VELOCITY_X) game.add_available_game_variable(GameVariable.VELOCITY_Y) game.add_available_game_variable(GameVariable.VELOCITY_Z) game.add_available_game_variable(GameVariable.ANGLE) game.add_available_game_variable(GameVariable.PITCH) game.add_available_game_variable(GameVariable.ROLL) if get_extra_info: game.set_labels_buffer_enabled(True) game.set_automap_buffer_enabled(True) game.set_automap_mode(AutomapMode.OBJECTS) game.set_automap_rotate(True) game.set_automap_render_textures(False) game.set_depth_buffer_enabled(True) game.add_game_args("+vid_forcesurface 1") game.init() if GameVariable.HEALTH in game.get_available_game_variables(): self.previous_health = game.get_game_variable(GameVariable.HEALTH) if self.use_shaping: self.shaping_reward = doom_fixed_to_double( game.get_game_variable(GameVariable.USER1)) if params.disable_head_bob: game.send_game_command('movebob 0.0') return game