Exemple #1
0
class Player:
    def __init__(self, num_opponents: int, num_teammates: int):
        # Game Interface:
        self.game_interface = HFOAttackingPlayer(num_opponents=num_opponents,
                                                 num_teammates=num_teammates)
        self.game_interface.connect_to_server()
        # Features Interface:
        self.features = DiscreteFeatures1Teammate(num_op=num_opponents,
                                                  num_team=num_teammates)
        # Actions Interface:
        self.actions = DiscreteActionsModule()
        # Agent instance:
        self.agent = QAgent(num_features=self.features.num_features,
                            num_actions=self.actions.get_num_actions(),
                            learning_rate=0.1,
                            discount_factor=0.9,
                            epsilon=1,
                            final_epsilon=0.3)

    def get_reward(self, status: int) -> int:
        return basic_reward(status)

    def set_starting_game_conditions(self,
                                     game_interface: HFOAttackingPlayer,
                                     features: DiscreteFeatures1Teammate,
                                     start_with_ball: bool = True,
                                     start_pos: tuple = None):
        """
        Set starting game conditions. Move for initial position, for example
        """
        if not start_pos:
            pos_name, start_pos = random.choice(
                list(STARTING_POSITIONS.items()))
        if start_with_ball:
            # Move to starting position:
            self.actions.dribble_to_pos(start_pos, features, game_interface)
        else:
            if self.features.has_ball():
                self.actions.kick_to_pos((0, 0), features, game_interface)
            # Move to starting position:
            self.actions.move_to_pos(start_pos, features, game_interface)
        # Informs the other players that it is ready to start:
        game_interface.hfo.say(settings.PLAYER_READY_MSG)

    def train(self,
              num_train_episodes: int,
              num_total_train_ep: int,
              start_with_ball: bool = True):
        """
        @param num_train_episodes: number of episodes to train in this iteration
        @param num_total_train_ep: number total of episodes to train
        @param start_with_ball: bool
        @raise ServerDownError
        @return: (QLearningAgentV5) the agent
        """
        # metrics variables:
        _num_wins = 0
        _sum_epsilons = 0
        for ep in range(num_train_episodes):
            # Check if server still running:
            self.game_interface.check_server_is_up()
            # Update features:
            self.features.update_features(self.game_interface.get_state())
            # Go to origin position:
            self.set_starting_game_conditions(
                game_interface=self.game_interface,
                features=self.features,
                start_with_ball=start_with_ball)

            # Start learning loop
            goal = False  # bool flag
            while self.game_interface.in_game():
                # Update environment features:
                features_array = self.features.get_features().copy()

                # Act:
                action_idx = self.agent.act(features_array)
                status = self.actions.execute_action(
                    action_idx=action_idx,
                    features=self.features,
                    game_interface=self.game_interface)

                # Every step we update replay memory and train main network
                done = not self.game_interface.in_game()
                goal = self.game_interface.scored_goal()
                self.agent.store_transition(
                    curr_st=features_array,
                    action_idx=action_idx,
                    reward=self.get_reward(status),
                    new_st=self.features.get_features(),
                    done=done)

            # Train
            self.agent.train(goal)
            # Update auxiliar variables:
            _sum_epsilons += self.agent.epsilon
            _num_wins += 1 if self.game_interface.scored_goal() else 0
            # Update Agent:
            self.agent.restart(num_total_train_ep)
            # Game Reset
            self.game_interface.reset()
        print("[TRAIN: Summary] WIN rate = {}; AVR epsilon = {}".format(
            _num_wins / num_train_episodes,
            _sum_epsilons / num_train_episodes))

    def test(self,
             num_episodes: int,
             start_with_ball: bool = True,
             training: bool = False) -> float:
        """
        @param num_episodes: number of episodes to run
        @param start_with_ball: flag
        @param training: flag
        @return: (float) the win rate
        """
        starting_pos_list = list(STARTING_POSITIONS.values())
        # metrics variables:
        _num_wins = 0
        for ep in range(num_episodes):
            # Check if server still running:
            self.game_interface.check_server_is_up()
            # Update features:
            self.features.update_features(self.game_interface.get_state())
            # Set up gaming conditions:
            self.set_starting_game_conditions(
                game_interface=self.game_interface,
                features=self.features,
                start_pos=starting_pos_list[ep % len(starting_pos_list)],
                start_with_ball=start_with_ball)

            # Start learning loop
            prev_action_idx = None
            while self.game_interface.in_game():
                # Update environment features:
                features_array = self.features.get_features().copy()

                # Act:
                action_idx = self.agent.exploit_actions(features_array)
                if prev_action_idx != action_idx and not training:
                    print("ACTION:: {}".format(
                        self.actions.map_action_to_str(
                            action_idx, self.features.has_ball())))
                prev_action_idx = action_idx

                self.actions.execute_action(action_idx=action_idx,
                                            features=self.features,
                                            game_interface=self.game_interface)

            # Update auxiliar variables:
            _num_wins += 1 if self.game_interface.scored_goal() else 0
            # Game Reset
            self.game_interface.reset()
        avr_win_rate = _num_wins / num_episodes
        print("[TEST: Summary] WIN rate = {};".format(avr_win_rate))
        return avr_win_rate
Exemple #2
0
    # Features Interface:
    features = DiscFeatures1Teammate(num_op=NUM_OPPONENTS,
                                     num_team=NUM_TEAMMATES)
    # Actions Interface:
    actions = Actions()

    for ep in range(num_games):
        
        # Update features:
        features.update_features(game_interface.get_state())
        # Set up gaming conditions:
        actions.dribble_to_pos((-0.5, -0.7), features, game_interface)
        
        # Start learning loop
        status = IN_GAME
        prev_action_idx = None
        while game_interface.in_game():
            if features.has_ball():
                actions.shoot_ball(game_interface, features)
            else:
                actions.do_nothing(game_interface, features)

        # Update auxiliar variables:
        if game_interface.scored_goal() or status == GOAL:
            print("[GOAL]")
        else:
            print("[FAIL]")
        # Game Reset
        game_interface.reset()

Exemple #3
0
class Player:
    def __init__(self,
                 num_opponents: int,
                 num_teammates: int,
                 port: int = 6000,
                 online: bool = True):
        # Game Interface:
        self.game_interface = HFOAttackingPlayer(num_opponents=num_opponents,
                                                 num_teammates=num_teammates,
                                                 port=port)
        if online:
            self.game_interface.connect_to_server()
        # Features Interface:
        self.features = PlasticFeatures(num_op=num_opponents,
                                        num_team=num_teammates)
        # Actions Interface:
        self.actions = Actions()
        # Agent instance:
        self.agent = DQNAgent(num_features=self.features.num_features,
                              num_actions=self.actions.get_num_actions(),
                              learning_rate=0.005,
                              discount_factor=0.99,
                              epsilon=1,
                              final_epsilon=0.001,
                              epsilon_decay=0.99995,
                              tau=0.125)

    def get_reward(self, game_status: int) -> int:
        if game_status == GOAL:
            return 1000
        elif game_status in [CAPTURED_BY_DEFENSE, OUT_OF_BOUNDS, OUT_OF_TIME]:
            return -1000
        else:
            return -1

    def set_starting_game_conditions(self,
                                     game_interface: HFOAttackingPlayer,
                                     features: PlasticFeatures,
                                     start_with_ball: bool = True,
                                     start_pos: tuple = None):
        """
        Set starting game conditions. Move for initial position, for example
        """
        if not start_pos:
            pos_name, start_pos = random.choice(
                list(STARTING_POSITIONS.items()))
        if start_with_ball:
            # Move to starting position:
            self.actions.dribble_to_pos(start_pos, features, game_interface)
        else:
            if self.features.has_ball():
                self.actions.kick_to_pos((0, 0), features, game_interface)
            # Move to starting position:
            self.actions.move_to_pos(start_pos, features, game_interface)
        # Informs the other players that it is ready to start:
        game_interface.hfo.say(settings.PLAYER_READY_MSG)

    def test(self, num_episodes: int, start_with_ball: bool = True) -> float:
        """
        @param num_episodes: number of episodes to run
        @param start_with_ball: flag
        @return: (float) the win rate
        """
        starting_pos_list = list(STARTING_POSITIONS.values())
        # metrics variables:
        _num_wins = 0
        for ep in range(num_episodes):
            # Check if server still running:
            try:
                self.game_interface.check_server_is_up()
            except ServerDownError as e:
                print("!!SERVER DOWN!! TEST {}/{}".format(ep, num_episodes))
                avr_win_rate = round(_num_wins / (ep + 1), 2)
                print("[TEST: Summary] WIN rate = {};".format(avr_win_rate))
                return avr_win_rate
            # Update features:
            self.features.update_features(self.game_interface.get_state())
            # Set up gaming conditions:
            self.set_starting_game_conditions(
                game_interface=self.game_interface,
                features=self.features,
                start_pos=starting_pos_list[ep % len(starting_pos_list)],
                start_with_ball=start_with_ball)
            print("\nNEW TEST [{}]".format(
                starting_pos_list[ep % len(starting_pos_list)]))
            # print("FEATURES: ", self.features.get_features())

            # Start learning loop
            status = IN_GAME
            prev_action_idx = None
            while self.game_interface.in_game():
                if self.features.has_ball():
                    # Update environment features:
                    features_array = self.features.get_features().copy()

                    # Act:
                    action_idx = self.agent.exploit_actions(features_array)
                    if prev_action_idx != action_idx:
                        print("ACTION:: {}".format(
                            self.actions.map_action_to_str(action_idx)))
                    prev_action_idx = action_idx
                    self.actions.execute_action(
                        action_idx=action_idx,
                        features=self.features,
                        game_interface=self.game_interface)
                else:
                    if prev_action_idx != -1:
                        print("ACTION:: MOVE!!")
                    prev_action_idx = -1
                    status = self.actions.no_ball_action(
                        features=self.features,
                        game_interface=self.game_interface)

            # Update auxiliar variables:
            if self.game_interface.scored_goal() or status == GOAL:
                print("[GOAL]")
                _num_wins += 1
            else:
                print("[FAIL]")
            # Game Reset
            self.game_interface.reset()
        avr_win_rate = round(_num_wins / num_episodes, 2)
        print("[TEST: Summary] WIN rate = {};".format(avr_win_rate))
        return avr_win_rate

    def train(self,
              num_train_episodes: int,
              num_total_train_ep: int,
              start_with_ball: bool = True):
        """
        @param num_train_episodes: number of episodes to train in this iteration
        @param num_total_train_ep: number total of episodes to train
        @param start_with_ball: bool
        @raise ServerDownError
        @return: (QLearningAgentV5) the agent
        """
        starting_pos_list = list(STARTING_POSITIONS.values())

        # metrics variables:
        _num_wins = 0
        _sum_epsilons = 0
        for ep in range(num_train_episodes):
            # Check if server still running:
            try:
                self.game_interface.check_server_is_up()
            except ServerDownError as e:
                print("!!SERVER DOWN!! TRAIN {}/{}".format(
                    ep, num_train_episodes))
                return
            # Update features:
            self.features.update_features(self.game_interface.get_state())

            # Go to origin position:
            self.set_starting_game_conditions(
                game_interface=self.game_interface,
                features=self.features,
                start_pos=starting_pos_list[ep % len(starting_pos_list)],
                start_with_ball=start_with_ball)

            # Start learning loop
            status = IN_GAME
            episode_buffer = list()
            while self.game_interface.in_game():
                # Has Ball:
                if self.features.has_ball():
                    # Update environment features:
                    features_array = self.features.get_features().copy()

                    # Act:
                    action_idx = self.agent.act(features_array)
                    status = self.actions.execute_action(
                        action_idx=action_idx,
                        features=self.features,
                        game_interface=self.game_interface)

                    # Every step we update replay memory and train main network
                    done = not self.game_interface.in_game()
                    # Store transition:
                    # (obs, action, reward, new obs, done?)
                    transition = np.array([
                        features_array, action_idx,
                        self.get_reward(status),
                        self.features.get_features(), done
                    ])
                    episode_buffer.append(transition)
                    # Train:
                    self.agent.train(terminal_state=done)
                # No ball:
                else:
                    status = self.actions.no_ball_action(
                        features=self.features,
                        game_interface=self.game_interface)

            if self.game_interface.scored_goal() or status == GOAL:
                _num_wins += 1
                reward = self.get_reward(GOAL)
            else:
                reward = self.get_reward(status)
            # Add episodes:
            self.agent.store_episode(episode_buffer, reward=reward)
            # Update auxiliar variables:
            _sum_epsilons += self.agent.epsilon
            # Update Agent:
            self.agent.restart(num_total_train_ep)
            # Game Reset
            self.game_interface.reset()
        avr_epsilon = round(_sum_epsilons / num_train_episodes, 3)
        print("[TRAIN: Summary] WIN rate = {}; AVR epsilon = {}".format(
            _num_wins / num_train_episodes, avr_epsilon))
        return avr_epsilon

    def train_offline(self, game_buffer: np.ndarray):
        for _ in range(5):
            buffer = game_buffer.copy()
            self.agent.train_from_batch(buffer)
            print("MODEL TRAINED")
            aux = [-1] * 6
            features_base = np.array(aux)
            for idx in range(6):
                features_array = features_base.copy()
                features_array[idx] = 0
                print("[TEST] {}".format(features_array.tolist()))
                action_idx = self.agent.exploit_actions(features_array,
                                                        verbose=True)
                print("-> {}".format(
                    self.actions.map_action_to_str(action_idx)))
Exemple #4
0
class Player:
    def __init__(self, num_opponents: int, num_teammates: int,
                 port: int = 6000):
        # Game Interface:
        self.game_interface = HFOAttackingPlayer(num_opponents=num_opponents,
                                                 num_teammates=num_teammates,
                                                 port=port)
        self.game_interface.connect_to_server()
        # Features Interface:
        self.features = DiscFeatures1Teammate(num_op=num_opponents,
                                              num_team=num_teammates)
        # Actions Interface:
        self.actions = Actions()
        # Agent instance:
        self.agent = QAgent(num_features=self.features.num_features,
                            num_actions=self.actions.get_num_actions(),
                            learning_rate=0.1, discount_factor=0.9, epsilon=0.8)
    
    def get_reward(self, status: int) -> int:
        return basic_reward(status)
    
    def set_starting_game_conditions(self, game_interface: HFOAttackingPlayer,
                                     features: DiscFeatures1Teammate,
                                     start_with_ball: bool = True,
                                     start_pos: tuple = None):
        """
        Set starting game conditions. Move for initial position, for example
        """
        if not start_pos:
            pos_name, start_pos = random.choice(
                list(STARTING_POSITIONS.items()))
        if start_with_ball:
            # Move to starting position:
            self.actions.dribble_to_pos(start_pos, features, game_interface)
        else:
            if self.features.has_ball():
                self.actions.kick_to_pos((0, 0), features, game_interface)
            # Move to starting position:
            self.actions.move_to_pos(start_pos, features, game_interface)
        # Informs the other players that it is ready to start:
        game_interface.hfo.say(settings.PLAYER_READY_MSG)

    def test(self, num_episodes: int, start_with_ball: bool = True) -> float:
        """
        @param num_episodes: number of episodes to run
        @param start_with_ball: flag
        @return: (float) the win rate
        """
        starting_pos_list = list(STARTING_POSITIONS.values())
        # metrics variables:
        _num_wins = 0
        for ep in range(num_episodes):
            # Check if server still running:
            self.game_interface.check_server_is_up()
            # Update features:
            self.features.update_features(self.game_interface.get_state())
            # Set up gaming conditions:
            self.set_starting_game_conditions(
                game_interface=self.game_interface, features=self.features,
                start_pos=starting_pos_list[ep % len(starting_pos_list)],
                start_with_ball=start_with_ball)
            print("\nNEW TEST [{}]".format(
                starting_pos_list[ep % len(starting_pos_list)]))

            # Start learning loop
            status = IN_GAME
            prev_action_idx = None
            while self.game_interface.in_game():
                if self.features.has_ball():
                    # Update environment features:
                    features_array = self.features.get_features().copy()
        
                    # Act:
                    action_idx = self.agent.exploit_actions(features_array)
                    if prev_action_idx != action_idx:
                        print("ACTION:: {}".format(
                            self.actions.map_action_to_str(action_idx)))
                    prev_action_idx = action_idx
                    self.actions.execute_action(
                        action_idx=action_idx,
                        features=self.features,
                        game_interface=self.game_interface)
                else:
                    if prev_action_idx != -1:
                        print("ACTION:: MOVE!!")
                    prev_action_idx = -1
                    status = self.actions.no_ball_action(
                        features=self.features,
                        game_interface=self.game_interface)

            # Update auxiliar variables:
            if self.game_interface.scored_goal() or status == GOAL:
                print("[GOAL]")
                _num_wins += 1
            else:
                print("[FAIL]")
            # Game Reset
            self.game_interface.reset()
        avr_win_rate = _num_wins / num_episodes
        print("[TEST: Summary] WIN rate = {};".format(avr_win_rate))
        return avr_win_rate

    def train(self, num_train_episodes: int, num_total_train_ep: int,
              start_with_ball: bool = True):
        """
        @param num_train_episodes: number of episodes to train in this iteration
        @param num_total_train_ep: number total of episodes to train
        @param start_with_ball: bool
        @raise ServerDownError
        @return: (QLearningAgentV5) the agent
        """
        # metrics variables:
        _num_wins = 0
        _sum_epsilons = 0
        for ep in range(num_train_episodes):
            # Check if server still running:
            self.game_interface.check_server_is_up()
            # Update features:
            self.features.update_features(self.game_interface.get_state())
            # Go to origin position:
            self.set_starting_game_conditions(
                game_interface=self.game_interface, features=self.features,
                start_with_ball=start_with_ball)
            
            # Start learning loop
            status = IN_GAME
            episode_buffer = list()
            while self.game_interface.in_game():
                # Has Ball:
                if self.features.has_ball():
                    # Update environment features:
                    features_array = self.features.get_features().copy()
                
                    # Act:
                    action_idx = self.agent.act(features_array)
                    status = self.actions.execute_action(
                        action_idx=action_idx,
                        features=self.features,
                        game_interface=self.game_interface)
    
                    # Every step we update replay memory and train main network
                    done = not self.game_interface.in_game()
                    # Store transition:
                    # (obs, action, reward, new obs, done?)
                    transition = np.array(
                        [features_array, action_idx, self.get_reward(status),
                         self.features.get_features(), done])
                    episode_buffer.append(transition)
                # No ball:
                else:
                    status = self.actions.no_ball_action(
                        features=self.features,
                        game_interface=self.game_interface)
            if self.game_interface.scored_goal() or status == GOAL:
                _num_wins += 1
                reward = self.get_reward(GOAL)
            else:
                reward = self.get_reward(status)
            self.agent.store_episode(episode_buffer, reward=reward)
            # Train:
            self.agent.train(terminal_state=True)
            # Update auxiliar variables:
            _sum_epsilons += self.agent.epsilon
            # Update Agent:
            self.agent.restart(num_total_train_ep)
            # Game Reset
            self.game_interface.reset()
        print("[TRAIN: Summary] WIN rate = {}; AVR epsilon = {}".format(
            _num_wins / num_train_episodes, _sum_epsilons / num_train_episodes))