コード例 #1
0
 def __init__(self,
              num_opponents: int,
              num_teammates: int,
              model_file: str,
              epsilon: int = 1,
              port: int = 6000):
     # Game Interface:
     self.game_interface = HFOAttackingPlayer(num_opponents=num_opponents,
                                              num_teammates=num_teammates,
                                              port=port)
     self.game_interface.connect_to_server()
     # Features Interface:
     self.features = PlasticFeatures(num_op=num_opponents,
                                     num_team=num_teammates)
     # Actions Interface:
     self.actions = Actions(num_team=num_teammates,
                            features=self.features,
                            game_interface=self.game_interface)
     # Agent instance:
     self.epsilon = epsilon
     self.dqn = DQN.load(load_file=model_file)
コード例 #2
0
ファイル: train_model.py プロジェクト: pedMatias/matias_hfo
    def __init__(self, num_opponents: int, num_teammates: int,
                 model_file: str):
        # Game Interface:
        self.game_interface = HFOAttackingPlayer(num_opponents=num_opponents,
                                                 num_teammates=num_teammates)
        # Features Interface:
        self.features = PlasticFeatures(num_op=num_opponents,
                                        num_team=num_teammates)
        # Actions Interface:
        self.actions = Actions(num_team=num_teammates,
                               features=self.features,
                               game_interface=self.game_interface)
        # DQNs:
        self.dqn = DQN.load(load_file=model_file)

        # Replay buffer:
        self.replay_buffer = deque(maxlen=REPLAY_MEMORY_SIZE)
コード例 #3
0
class Player:
    def __init__(self,
                 num_opponents: int,
                 num_teammates: int,
                 model_file: str,
                 epsilon: int = 1,
                 port: int = 6000):
        # Game Interface:
        self.game_interface = HFOAttackingPlayer(num_opponents=num_opponents,
                                                 num_teammates=num_teammates,
                                                 port=port)
        self.game_interface.connect_to_server()
        # Features Interface:
        self.features = PlasticFeatures(num_op=num_opponents,
                                        num_team=num_teammates)
        # Actions Interface:
        self.actions = Actions(num_team=num_teammates,
                               features=self.features,
                               game_interface=self.game_interface)
        # Agent instance:
        self.epsilon = epsilon
        self.dqn = DQN.load(load_file=model_file)
        # Metrics:
        self.metrics = GameMetrics()

    def set_starting_game_conditions(self,
                                     start_pos: tuple = None,
                                     starts_fixed_position: bool = True,
                                     verbose: bool = False):
        """
        Set starting game conditions. Move for initial position, for example
        """
        if starts_fixed_position:
            if not start_pos:
                ball_pos: list = list(self.features.get_ball_coord())
                starting_corners = get_vertices_around_ball(ball_pos)
                start_pos = random.choice(starting_corners)
            self.actions.move_to_pos(start_pos)
            if verbose:
                print(f"[PLAYER: GAME SET UP] Initial pos= {start_pos}")
        else:
            # Start in current position
            if verbose:
                print(f"[START GAME] Initial pos= RANDOM")
        # Informs the other players that it is ready to start:
        self.game_interface.hfo.say(settings.PLAYER_READY_MSG)

    def exploit_actions(self, state: np.ndarray, verbose: bool = False) -> int:
        q_predict = self.dqn.predict(state)
        max_list = np.where(q_predict == q_predict.max())
        if len(max_list[0]) > 1:
            action = np.random.choice(max_list[0])
        else:
            action = np.argmax(q_predict)
        if verbose:
            print("Q values {} -> {}".format(q_predict, int(action)))
        return int(action)

    def play(self, num_episodes: int, starts_fixed_position: bool = True):
        """
        @param num_episodes: number of episodes to train in this iteration
        @param starts_fixed_position: bool
        @raise ServerDownError
        @return: Game Metrics
        """
        self.metrics.restart()

        for ep in range(num_episodes):
            # Check if server still running:
            try:
                self.game_interface.check_server_is_up()
            except ServerDownError:
                print("!!SERVER DOWN!! Test {}/{}".format(ep, num_episodes))
                metrics_dict = self.metrics.export_to_dict(num_episodes)
                return metrics_dict

            # Update features:
            self.features.update_features(
                observation=self.game_interface.get_observation())

            # Go to origin position:
            self.set_starting_game_conditions(
                starts_fixed_position=starts_fixed_position)

            # metrics:
            touched_ball = False
            passed_ball = False
            while self.game_interface.in_game():
                if self.features.has_ball(): touched_ball = True

                # Update environment features:
                features_array = self.features.get_features()

                # Act:
                act = self.exploit_actions(features_array, verbose=False)
                status, correct_action, passed_ball_succ = \
                    self.actions.execute_action(act)

                # Metrics:
                if passed_ball_succ is True: passed_ball = True
                if correct_action:
                    self.metrics.inc_num_correct_actions()
                else:
                    self.metrics.inc_num_wrong_actions()

            # Update auxiliar variables:
            self.metrics.inc_num_ep()
            if self.game_interface.scored_goal(): self.metrics.inc_num_wins()
            if touched_ball: self.metrics.inc_num_games_touched_ball()
            if passed_ball: self.metrics.inc_num_games_passed_ball()

            # Game Reset
            self.game_interface.reset()

        metrics_dict = self.metrics.export_to_dict(num_episodes)
        return metrics_dict
コード例 #4
0
class Player:
    def __init__(self,
                 num_opponents: int,
                 num_teammates: int,
                 model_file: str,
                 epsilon: int = 1,
                 port: int = 6000):
        # Game Interface:
        self.game_interface = HFOAttackingPlayer(num_opponents=num_opponents,
                                                 num_teammates=num_teammates,
                                                 port=port)
        self.game_interface.connect_to_server()
        # Features Interface:
        self.features = PlasticFeatures(num_op=num_opponents,
                                        num_team=num_teammates)
        # Actions Interface:
        self.actions = Actions(num_team=num_teammates,
                               features=self.features,
                               game_interface=self.game_interface)
        # Agent instance:
        self.epsilon = epsilon
        self.dqn = DQN.load(load_file=model_file)

    def exploit_actions(self, state: np.ndarray, verbose: bool = False) -> int:
        q_predict = self.dqn.predict(state)
        max_list = np.where(q_predict == q_predict.max())
        action = np.random.choice(max_list[0]) if len(max_list[0]) > 1 \
            else np.argmax(q_predict)
        if verbose:
            print("Q values {} -> {}".format(q_predict, int(action)))
        return int(action)

    def explore_actions(self):
        random_action = np.random.randint(0, self.actions.num_actions)
        return random_action

    def act(self, state: np.ndarray, verbose: bool = False):
        if np.random.random() < self.epsilon:  # Explore
            if verbose: print("[ACT] Explored")
            return self.explore_actions()
        else:  # Exploit
            if verbose: print("[ACT] Exploit")
            return self.exploit_actions(state)

    def get_reward(self, game_status: int, correct_action: bool) -> int:
        reward = 0
        if game_status == GOAL:
            reward += 1000
        elif game_status in [CAPTURED_BY_DEFENSE, OUT_OF_BOUNDS, OUT_OF_TIME]:
            reward -= 1000
        else:
            if correct_action:
                reward += 1
            else:
                reward -= 5
        return reward

    def play_episode(self):
        # auxiliar structures:
        episode_buffer = list()
        # metrics:
        touched_ball = False
        passed_ball = False
        num_wrong_actions = 0
        num_correct_actions = 0
        while self.game_interface.in_game():
            touched_ball = True if self.features.has_ball() else False
            # Update environment features:
            features_array = self.features.get_features()
            # Act:
            act = self.act(features_array, verbose=False)
            status, correct_action, passed_ball_succ = \
                self.actions.execute_action(act)

            # Store transition:
            # (obs, action, reward, new obs, done?)
            transition = Transition(obs=features_array,
                                    act=act,
                                    reward=self.get_reward(
                                        status, correct_action),
                                    new_obs=self.features.get_features(),
                                    done=not self.game_interface.in_game(),
                                    correct_action=correct_action)
            episode_buffer.append(transition)

            # Metrics:
            passed_ball = True if passed_ball_succ else False
            if correct_action:
                num_correct_actions += 1
            else:
                num_wrong_actions += 1
        metrics = EpisodeMetrics(touched_ball=touched_ball,
                                 passed_ball=passed_ball,
                                 num_wrong_actions=num_wrong_actions,
                                 num_correct_actions=num_correct_actions)
        return episode_buffer, metrics

    def play(self, num_episodes: int, starts_fixed_position: bool = True):
        """
        @param num_episodes: number of episodes to train in this iteration
        @param starts_fixed_position: bool
        @raise ServerDownError
        @return: Game Metrics
        """
        experience_buffer = LearnBuffer()
        game_metrics = GameMetrics()

        for ep in range(num_episodes):
            # Check if server still running:
            try:
                self.game_interface.check_server_is_up()
            except ServerDownError:
                print("!!SERVER DOWN!! Test {}/{}".format(ep, num_episodes))
                return experience_buffer, game_metrics.export_to_dict()

            # Update features:
            self.features.update_features(
                observation=self.game_interface.get_observation())

            # Play episode:
            ep_buffer, ep_metrics = self.play_episode()
            # Save episode:
            experience_buffer.save_episode(ep_buffer, verbose=True)

            # Update auxiliar variables:
            game_metrics.add_episode_metrics(
                ep_metrics, goal=self.game_interface.scored_goal())

            # Game Reset
            self.game_interface.reset()

        return experience_buffer, game_metrics.export_to_dict()