Ejemplo n.º 1
0
 def _get_game_variables(self, state_variables):
     info = {
         "LEVEL": self.level
     }
     if state_variables is None:
         return info
     info['KILLCOUNT'] = state_variables[0]
     info['ITEMCOUNT'] = state_variables[1]
     info['SECRETCOUNT'] = state_variables[2]
     info['FRAGCOUNT'] = state_variables[3]
     info['HEALTH'] = state_variables[4]
     info['ARMOR'] = state_variables[5]
     info['DEAD'] = state_variables[6]
     info['ON_GROUND'] = state_variables[7]
     info['ATTACK_READY'] = state_variables[8]
     info['ALTATTACK_READY'] = state_variables[9]
     info['SELECTED_WEAPON'] = state_variables[10]
     info['SELECTED_WEAPON_AMMO'] = state_variables[11]
     info['AMMO1'] = state_variables[12]
     info['AMMO2'] = state_variables[13]
     info['AMMO3'] = state_variables[14]
     info['AMMO4'] = state_variables[15]
     info['AMMO5'] = state_variables[16]
     info['AMMO6'] = state_variables[17]
     info['AMMO7'] = state_variables[18]
     info['AMMO8'] = state_variables[19]
     info['AMMO9'] = state_variables[20]
     info['AMMO0'] = state_variables[21]
     info['POSITION_X'] = doom_fixed_to_double(self.game.get_game_variable(GameVariable.USER1))
     info['POSITION_Y'] = doom_fixed_to_double(self.game.get_game_variable(GameVariable.USER2))
     return info
Ejemplo n.º 2
0
    def step(self, action):
        info = {}

        reward = self.game.make_action(self.actions[action], self.frame_repeat)
        if self.reward_reshape:
            fixed_shaping_reward = self.game.get_game_variable(
                vzd.GameVariable.USER1)
            shaping_reward = vzd.doom_fixed_to_double(fixed_shaping_reward)
            shaping_reward = shaping_reward - self.last_total_shaping_reward
            self.last_total_shaping_reward += shaping_reward
            reward = shaping_reward

        done = self.game.is_episode_finished()
        if done:
            ob, n = self.last_input
            info = {'episode': {'r': self.total_reward, 'l': n}}
            # info['Episode_Total_Reward'] = self.total_reward
            # info['Episode_Total_Len'] = n
        else:
            ob, n = self.get_current_input()

        reward = reward * self.reward_scale
        self.total_reward += reward

        return ob, reward, done, info
    def reset(self):
        # if we are not in the efficent(but memory consuming) fixed scenario mode
        # we change the scenario with a 1/10 probability
        if (self.params.multimaze and not self.fixed_scenario
                and random.randrange(0, 10) == 0):

            if self.is_train:
                idx = random.randrange(0, self.params.num_mazes_train)
                #print('Creating new train maze with idx={}'.format(idx))
                self.game = self._create_game(self.params, idx, self.is_train)
            else:
                idx = random.randrange(0, self.params.num_mazes_test)
                #print('Creating new test maze with idx={}'.format(idx))
                self.game = self._create_game(self.params, idx, self.is_train)

        self.game.new_episode()

        self.player_info = self.PlayerInfo(
            self.game.get_game_variable(GameVariable.POSITION_X),
            self.game.get_game_variable(GameVariable.POSITION_Y),
            math.radians(self.game.get_game_variable(GameVariable.ANGLE)))

        if GameVariable.HEALTH in self.game.get_available_game_variables():
            self.previous_health = self.game.get_game_variable(
                GameVariable.HEALTH)

        if self.use_shaping:
            self.shaping_reward = doom_fixed_to_double(
                self.game.get_game_variable(GameVariable.USER1))

        return self.get_observation()
Ejemplo n.º 4
0
    def reset(self, can_gen_rand=True):
        if (not self.fixed_scenario and can_gen_rand and self.is_train
                and self.params.multimaze
                and self.params.num_mazes_train > 16 and random.randrange(
                    0, 10) == 0):  # 1/10 chance to load a new map
            idx = random.randrange(0, self.params.num_mazes_train)
            print('Creating new train maze with idx={}'.format(idx))
            self.game = self._create_game(self.params, idx, self.is_train)

        if (
                not self.fixed_scenario and can_gen_rand and not self.is_train
                and self.params.multimaze and self.params.num_mazes_test > 1
        ):  # this is required during testing or the result is biased toward easier mazes
            idx = random.randrange(0, self.params.num_mazes_test)
            print('Creating new test maze with idx={}'.format(idx))
            self.game = self._create_game(self.params, idx, self.is_train)

        self.game.new_episode()
        self.player_info = self.PlayerInfo(
            self.game.get_game_variable(GameVariable.POSITION_X),
            self.game.get_game_variable(GameVariable.POSITION_Y),
            math.radians(self.game.get_game_variable(GameVariable.ANGLE)),
            fixed_origin=self.params.fixed_origin)

        if GameVariable.HEALTH in self.game.get_available_game_variables():
            self.previous_health = self.game.get_game_variable(
                GameVariable.HEALTH)

        if self.use_shaping:
            self.shaping_reward = doom_fixed_to_double(
                self.game.get_game_variable(GameVariable.USER1))

        return self.get_observation()
Ejemplo n.º 5
0
    def goal(self):
        data = (self.game.get_game_variable(vizdoom.USER1),
                self.game.get_game_variable(vizdoom.USER2),
                self.game.get_game_variable(vizdoom.USER3))

        data = tuple(vizdoom.doom_fixed_to_double(x) for x in data)

        return data
    def make_action(self, action):
        """
            perform an action, includes an option to skip frames but repeat
            the same action.
            
        """
        reward = self.game.make_action(self.action_map[action],
                                       self.frame_skip)

        # We shape rewards in health gathering to encourage collection of health packs
        if not self.use_shaping and self.is_train:
            reward += self._check_health()

        # alternatively ViZDoom offers a shaping reward in some scenarios
        if self.use_shaping and self.is_train:
            current_shaping_reward = doom_fixed_to_double(
                self.game.get_game_variable(GameVariable.USER1))
            diff = current_shaping_reward - self.shaping_reward
            reward += diff

            self.shaping_reward += diff

        return reward
Ejemplo n.º 7
0
    def make_action(self, action):
        """
            perform an action, includes an option to skip frames but repeat
            the same action.
            
        """
        reward = self.game.make_action(self.action_map[action],
                                       self.frame_skip)

        if not self.use_shaping and self.is_train:  # before I was using shaping I was comparing health
            reward += self._check_health()
        count = self.frame_skip

        #        self.game.set_action(self.action_map[action])
        #        self.game.advance_action(self.frame_skip)
        #        reward = self.game.get_last_reward()
        #        reward += self._check_health()
        #        for skip in range(1, self.frame_skip):
        #            if self.is_episode_finished():
        #                break
        #            reward += self.game.make_action(self.action_map[action])
        #            reward += self._check_health()
        #            count += 1.0

        if self.no_reward_average:
            count = 1.0

        if self.use_shaping and self.is_train:
            current_shaping_reward = doom_fixed_to_double(
                self.game.get_game_variable(GameVariable.USER1))
            diff = current_shaping_reward - self.shaping_reward
            reward += diff

            self.shaping_reward += diff

        return reward / count
Ejemplo n.º 8
0
        game.new_episode()

        # Use this to remember last shaping reward value.
        last_total_shaping_reward = 0

        while not game.is_episode_finished():

            # Gets the state and possibly to something with it
            state = game.get_state()

            # Makes a random action and save the reward.
            reward = game.make_action(choice(actions))

            # Retrieve the shaping reward
            fixed_shaping_reward = game.get_game_variable(vzd.GameVariable.USER1)  # Get value of scripted variable
            shaping_reward = vzd.doom_fixed_to_double(
                fixed_shaping_reward)  # If value is in DoomFixed format project it to double
            shaping_reward = shaping_reward - last_total_shaping_reward
            last_total_shaping_reward += shaping_reward

            print("State #" + str(state.number))
            print("Health: ", state.game_variables[0])
            print("Last Reward:", reward)
            print("Last Shaping Reward:", shaping_reward)
            print("=====================")

            # Sleep some time because processing is too fast to watch.
            if sleep_time > 0:
                sleep(sleep_time)

        print("Episode finished!")
        print("Total reward:", game.get_total_reward())
    def _create_game(self, params, idx, is_train, get_extra_info=False):
        game = DoomGame()

        VALID_SCENARIOS = [
            'my_way_home.cfg', 'health_gathering.cfg',
            'health_gathering_supreme.cfg',
            'health_gathering_supreme_no_death_penalty.cfg',
            'deadly_corridor.cfg', 'defend_the_center.cfg',
            'defend_the_line.cfg', 'two_color_maze014.cfg',
            'labyrinth_maze000.cfg', 'labyrinth_maze11_000.cfg'
        ]

        VALID_MULTI_SCENARIOS = [
            'maze_{:003}.cfg', 'custom_scenario{:003}.cfg'
            'mino_maze{:003}.cfg', 'labyrinth_maze{:003}.cfg',
            'two_item_maze{:003}.cfg', 'six_item_maze{:003}.cfg',
            'four_item_maze{:003}.cfg', 'eight_item_maze{:003}.cfg',
            'repeated_laby_maze{:003}.cfg', 'two_color_maze{:003}.cfg',
            'custom_scenario{:003}.cfg'
        ]

        if params.scenario in VALID_SCENARIOS:
            game.load_config(params.scenario_dir + params.scenario)
        elif params.scenario in VALID_MULTI_SCENARIOS:
            assert params.multimaze
            if not is_train and params.test_scenario_dir:
                filename = params.test_scenario_dir + params.scenario.format(
                    idx)
                #print('loading file', filename)
                game.load_config(filename)
            else:
                if not is_train:
                    print(
                        'WARNING, LOADING TRAINING DATA FOR TESTING, THIS MAY NOT BE WHAT YOU INTENDED!'
                    )
                filename = params.scenario_dir + params.scenario.format(idx)
                #print('loading file', filename)
                game.load_config(filename)
        else:
            assert 0, 'Invalid environment {}'.format(params.scenario)

        if params.screen_size == '320X180':
            # TODO: Implement options for other resolutions
            game.set_screen_resolution(ScreenResolution.RES_320X180)
        else:
            assert 0, 'Invalid screen_size {}'.format(params.screen_size)

        game.set_sound_enabled(False)
        #game.add_game_args("+vid_forcesurface 1")
        game.set_window_visible(params.show_window)

        if params.show_window:
            game.set_mode(Mode.SPECTATOR)
            game.add_game_args("+freelook 1")

        # Player variables for prediction of position etc
        game.add_available_game_variable(GameVariable.POSITION_X)
        game.add_available_game_variable(GameVariable.POSITION_Y)
        game.add_available_game_variable(GameVariable.POSITION_Z)
        game.add_available_game_variable(GameVariable.VELOCITY_X)
        game.add_available_game_variable(GameVariable.VELOCITY_Y)
        game.add_available_game_variable(GameVariable.VELOCITY_Z)
        game.add_available_game_variable(GameVariable.ANGLE)
        game.add_available_game_variable(GameVariable.PITCH)
        game.add_available_game_variable(GameVariable.ROLL)

        if get_extra_info:
            game.set_labels_buffer_enabled(True)
            game.set_automap_buffer_enabled(True)
            game.set_automap_mode(AutomapMode.OBJECTS)
            game.set_automap_rotate(True)
            game.set_automap_render_textures(False)
            game.set_depth_buffer_enabled(True)

        game.init()

        if GameVariable.HEALTH in game.get_available_game_variables():
            self.previous_health = game.get_game_variable(GameVariable.HEALTH)

        if self.use_shaping:
            self.shaping_reward = doom_fixed_to_double(
                game.get_game_variable(GameVariable.USER1))

        if params.disable_head_bob:
            game.send_game_command('movebob 0.0')

        return game
Ejemplo n.º 10
0
    def _create_game(self, params, idx, is_train, get_extra_info=False):
        game = DoomGame()
        self.idx = idx
        game.set_window_visible(params.show_window)
        game.set_sound_enabled(False)
        game.add_game_args("+vid_forcesurface 1")

        VALID_SCENARIOS = [
            'my_way_home.cfg', 'health_gathering.cfg',
            'health_gathering_supreme.cfg',
            'health_gathering_supreme_no_death_penalty.cfg',
            'deadly_corridor.cfg', 'defend_the_center.cfg',
            'defend_the_line.cfg', 'custom_maze_001.cfg',
            'custom_maze_002.cfg', 'custom_maze_003.cfg',
            'custom_mazes_005/train/maze_000.cfg',
            'custom_mazes_005/train/maze_004.cfg',
            'custom_mazes_005/valid/maze_000.cfg', 'long_term_base.cfg',
            'scenario_x.cfg', 'scenario_cw2.cfg', 'scenario_2_item0.cfg',
            'scenario_2_item1.cfg', 'scenario_2_item2.cfg',
            'scenario_2_item3.cfg', 'scenario_3_item0.cfg',
            'two_color_maze040.cfg', 'four_item_maze034.cfg',
            'labyrinth_maze000.cfg', 'mino_maze000.cfg',
            'labyrinth_maze11_000.cfg', 'mino_maze_simple.cfg'
        ]

        VALID_MULTI_SCENARIOS = [
            'maze_{:003}.cfg', 'mino_maze{:003}.cfg',
            'labyrinth_maze{:003}.cfg', 'indicator_maze{:003}.cfg',
            'two_item_maze{:003}.cfg', 'six_item_maze{:003}.cfg',
            'four_item_maze{:003}.cfg', 'eight_item_maze{:003}.cfg',
            'repeated_laby_maze{:003}.cfg', 'two_color_maze{:003}.cfg'
        ]

        if params.scenario in VALID_SCENARIOS:
            game.load_config(params.scenario_dir + params.scenario)
        elif params.scenario in VALID_MULTI_SCENARIOS:
            assert params.multimaze
            if not is_train and params.test_scenario_dir:
                filename = params.test_scenario_dir + params.scenario.format(
                    idx)
                #print('loading file', filename)
                game.load_config(filename)
            else:
                filename = params.scenario_dir + params.scenario.format(idx)
                #print('loading file', filename)
                game.load_config(filename)
        elif params.scenario == 'curriculum':
            pass

        else:
            assert 0, 'Invalid environment {}'.format(params.scenario)

        if params.screen_size == '320X180':
            game.set_screen_resolution(ScreenResolution.RES_320X180)
        else:
            assert 0, 'Invalid screen_size {}'.format(params.screen_size)

        if (params.use_depth or params.predict_depth or params.ego_model
                or params.depth_as_obs):

            game.set_depth_buffer_enabled(True)
            #self.game.set_labels_buffer_enabled(True)

        game.set_window_visible(params.show_window)
        game.set_sound_enabled(False)
        if params.show_window:
            game.set_mode(Mode.SPECTATOR)
            game.add_game_args("+freelook 1")

        # Player variables for prediction of position etc
        game.add_available_game_variable(GameVariable.POSITION_X)
        game.add_available_game_variable(GameVariable.POSITION_Y)
        game.add_available_game_variable(GameVariable.POSITION_Z)
        game.add_available_game_variable(GameVariable.VELOCITY_X)
        game.add_available_game_variable(GameVariable.VELOCITY_Y)
        game.add_available_game_variable(GameVariable.VELOCITY_Z)
        game.add_available_game_variable(GameVariable.ANGLE)
        game.add_available_game_variable(GameVariable.PITCH)
        game.add_available_game_variable(GameVariable.ROLL)

        if get_extra_info:
            game.set_labels_buffer_enabled(True)
            game.set_automap_buffer_enabled(True)
            game.set_automap_mode(AutomapMode.OBJECTS)
            game.set_automap_rotate(True)
            game.set_automap_render_textures(False)
            game.set_depth_buffer_enabled(True)

        game.add_game_args("+vid_forcesurface 1")
        game.init()

        if GameVariable.HEALTH in game.get_available_game_variables():
            self.previous_health = game.get_game_variable(GameVariable.HEALTH)

        if self.use_shaping:
            self.shaping_reward = doom_fixed_to_double(
                game.get_game_variable(GameVariable.USER1))

        if params.disable_head_bob:
            game.send_game_command('movebob 0.0')

        return game