Exemple #1
0
def test(train_ep: int, num_episodes: int, game_interface: HFOAttackingPlayer,
         features: DiscreteHighLevelFeatures, agent: QLearningAgent,
         actions: ActionManager):
    # Run training using Q-Learning
    score = 0
    agent.test_episodes.append(train_ep)
    for ep in range(num_episodes):
        print('<Test> {}/{}:'.format(ep, num_episodes))
        while game_interface.in_game():
            # Update environment features:
            observation = game_interface.get_state()
            curr_state_id = features.get_state_index(observation)
            has_ball = features.has_ball(observation)

            # Act:
            action_idx = agent.exploit_actions(curr_state_id)
            hfo_action = actions.map_action(action_idx)

            # Step:
            status, observation = game_interface.step(hfo_action, has_ball)

            # Save Metrics:
            agent.save_visited_state(curr_state_id, action_idx)
            agent.cum_reward += reward_function(status)
        print(':: Episode: {}; reward: {}'.format(ep, agent.cum_reward))
        score += 1 if game_interface.status == GOAL else 0
        # Reset player:
        agent.reset(training=False)
        # Game Reset
        game_interface.reset()
    agent.scores.append(score)
    actions_name = [
        actions_manager.map_action_to_str(i) for i in range(agent.num_actions)
    ]
    agent.export_metrics(training=False, actions_name=actions_name)
Exemple #2
0
 def __init__(self,
              num_opponents: int,
              num_teammates: int,
              port: int = 6000,
              online: bool = True):
     # Game Interface:
     self.game_interface = HFOAttackingPlayer(num_opponents=num_opponents,
                                              num_teammates=num_teammates,
                                              port=port)
     if online:
         self.game_interface.connect_to_server()
     # Features Interface:
     self.features = PlasticFeatures(num_op=num_opponents,
                                     num_team=num_teammates)
     # Actions Interface:
     self.actions = Actions()
     # Agent instance:
     self.agent = DQNAgent(num_features=self.features.num_features,
                           num_actions=self.actions.get_num_actions(),
                           learning_rate=0.005,
                           discount_factor=0.99,
                           epsilon=1,
                           final_epsilon=0.001,
                           epsilon_decay=0.99995,
                           tau=0.125)
Exemple #3
0
    def move_agent(self, action_name, game_interface: HFOAttackingPlayer,
                   features: BaseHighLevelState):
        """ Agent Moves/Dribbles in a specific direction """

        # Get Movement type:
        action = DRIBBLE_TO

        if "UP" in action_name:
            action = (action, features.agent.x_pos, -0.9)
        elif "DOWN" in action_name:
            action = (action, features.agent.x_pos, 0.9)
        elif "LEFT" in action_name:
            action = (action, -0.8, features.agent.y_pos)
        elif "RIGHT" in action_name:
            action = (action, 0.8, features.agent.y_pos)
        else:
            raise ValueError("ACTION NAME is WRONG")

        attempts = 0
        while game_interface.in_game() and attempts < self.action_num_episodes:
            status, observation = game_interface.step(action,
                                                      features.has_ball())
            features.update_features(observation)
            attempts += 1
        return game_interface.get_game_status(), \
            game_interface.get_observation_array()
def move_agent(action_name, game_interface: HFOAttackingPlayer,
               features: DiscreteFeatures1TeammateV1):
    # print("move_agent!")
    if "SHORT" in action_name:
        num_repetitions = 10
    elif "LONG" in action_name:
        num_repetitions = 20
    else:
        raise ValueError("ACTION NAME is WRONG")

    # Get Movement type:
    if "MOVE" in action_name:
        action = MOVE_TO
    elif "DRIBBLE" in action_name:
        action = DRIBBLE_TO
    else:
        raise ValueError("ACTION NAME is WRONG")

    if "UP" in action_name:
        action = (action, features.agent_coord[0], -0.9)
    elif "DOWN" in action_name:
        action = (action, features.agent_coord[0], 0.9)
    elif "LEFT" in action_name:
        action = (action, -0.8, features.agent_coord[1])
    elif "RIGHT" in action_name:
        action = (action, 0.8, features.agent_coord[1])
    else:
        raise ValueError("ACTION NAME is WRONG")

    attempts = 0
    while game_interface.in_game() and attempts < num_repetitions:
        status, observation = game_interface.step(action, features.has_ball())
        features.update_features(observation)
        attempts += 1
    return status, observation
Exemple #5
0
def test(num_episodes: int, game_interface: HFOAttackingPlayer,
         features: DiscreteFeatures, agent: QLearningAgentTest,
         actions: DiscreteActionsV5, reward_funct) -> float:
    """
    @param num_episodes: number of episodes to run
    @param game_interface: game interface, that manages interactions
    between both;
    @param features: features interface, from the observation array, gets the
    main features for the agent;
    @param agent: learning agent;
    @param actions: actions interface;
    @param reward_funct: reward function used
    @return: (float) the average reward
    """
    # Run training using Q-Learning
    sum_score = 0
    for ep in range(num_episodes):
        # Check if server still up:
        if game_interface.hfo.step() == SERVER_DOWN:
            raise ServerDownError("testing; episode={}".format(ep))
        # Go to origin position:
        features.update_features(game_interface.get_state())
        go_to_origin_position(game_interface=game_interface,
                              features=features,
                              actions=actions)
        # Test loop:
        debug_counter = 0  # TODO remove
        while game_interface.in_game():
            # Update environment features:
            curr_state_id = features.get_state_index()
            has_ball = features.has_ball()

            if not has_ball:
                hfo_action_params = GO_TO_BALL
                num_rep = 5
            else:
                # Act:
                debug_counter += 1
                action_idx = agent.exploit_actions(curr_state_id)
                hfo_action_params, num_rep = \
                    actions.map_action_idx_to_hfo_action(
                        agent_pos=features.get_pos_tuple(), has_ball=has_ball,
                        action_idx=action_idx)

            # Step:
            status, observation = execute_action(
                action_params=hfo_action_params,
                repetitions=num_rep,
                has_ball=has_ball,
                game_interface=game_interface)

            # update features:
            reward = reward_funct(status)
            features.update_features(observation)
            sum_score += reward
        # Game Reset
        game_interface.reset()
    print("## AVR Test reward = ", sum_score / num_episodes)
    return sum_score / num_episodes
Exemple #6
0
def execute_action(action_params: tuple, repetitions: int,
                   game_interface: HFOAttackingPlayer, has_ball: bool):
    rep_counter_aux = 0
    observation = []
    while game_interface.in_game() and rep_counter_aux < repetitions:
        status, observation = game_interface.step(action_params, has_ball)
        rep_counter_aux += 1
    return game_interface.get_game_status(), observation
def test(num_episodes: int, game_interface: HFOAttackingPlayer,
         features: DiscreteFeatures1TeammateV1, agent: QLearningAgent,
         actions: DiscreteActions1TeammateV1, reward_funct) -> float:
    """
    @param num_episodes: number of episodes to run
    @param game_interface: game interface, that manages interactions
    between both;
    @param features: features interface, from the observation array, gets the
    main features for the agent;
    @param agent: learning agent;
    @param actions: actions interface;
    @param reward_funct: reward function used
    @return: (float) the win rate
    """
    # Run training using Q-Learning
    num_goals = 0
    for ep in range(num_episodes):
        # Check if server still up:
        if game_interface.hfo.step() == SERVER_DOWN:
            print("Server is down while testing; episode={}".format(ep))
            break
        # Go to origin position:
        features.update_features(game_interface.get_state())
        go_to_origin_position(game_interface=game_interface,
                              features=features,
                              actions=actions)
        # Test loop:
        debug_counter = 0  # TODO remove
        while game_interface.in_game():
            # Update environment features:
            curr_state_id = features.get_state_index()
            has_ball = features.has_ball()

            # Act:
            debug_counter += 1
            action_idx = agent.act(curr_state_id)
            action_name = actions.map_action_to_str(action_idx, has_ball)
            print("Agent playing {}".format(action_name))

            # Step:
            status = execute_action(action_name=action_name,
                                    features=features,
                                    game_interface=game_interface)

            # update features:
            reward = reward_funct(status)
        num_goals += 1 if reward == 1 else 0

        if status == OUT_OF_TIME:
            if debug_counter < 5:
                raise NoActionPlayedError(
                    "agent was only able to choose {}".format(debug_counter))
        # Game Reset
        game_interface.reset()
    print("<<TEST>> NUM Goals = ", num_goals)
    print("<<TEST>> NUM episodes = ", (ep + 1))
    print("<<TEST>> AVR win rate = ", num_goals / (ep + 1))
    return num_goals / num_episodes
def train(num_episodes: int, game_interface: HFOAttackingPlayer,
          features: discrete_features_v2.DiscreteFeaturesV2,
          agent: QLearningAgent, actions: DiscreteActions, reward_funct):
    for ep in range(num_episodes):
        print('<Training> Episode {}/{}:'.format(ep, num_episodes))
        aux_positions_names = set()
        aux_actions_played = set()
        while game_interface.in_game():
            # Update environment features:
            features.update_features(game_interface.get_state())
            curr_state_id = features.get_state_index()
            has_ball = features.has_ball()

            # Act:
            action_idx = agent.act(curr_state_id)
            aux_actions_played.add(actions.map_action_to_str(action_idx))
            hfo_action: tuple = actions.map_action_idx_to_hfo_action(
                features.get_pos_tuple(), action_idx)

            # Step:
            status, observation = game_interface.step(hfo_action, has_ball)
            reward = reward_funct(status)

            # Save metrics:
            agent.save_visited_state(curr_state_id, action_idx)
            agent.cum_reward += reward
            aux_positions_names.add(features.get_position_name())

            # Update environment features:
            prev_state_id = curr_state_id
            features.update_features(observation)
            curr_state_id = features.get_state_index()

            # Update agent
            agent.learn(prev_state_id, action_idx, reward, status,
                        curr_state_id)
        print(':: Episode: {}; reward: {}; positions: {}; actions: {}'.format(
            ep, agent.cum_reward, aux_positions_names, aux_actions_played))
        agent.save_metrics(agent.old_q_table, agent.q_table)
        # Reset player:
        agent.reset()
        agent.update_hyper_parameters()
        # Game Reset
        game_interface.reset()
    agent.save_model()
    actions_name = [
        actions_manager.map_action_to_str(i) for i in range(agent.num_actions)
    ]
    agent.export_metrics(training=True, actions_name=actions_name)
def go_to_origin_position(game_interface: HFOAttackingPlayer,
                          features: DiscreteFeatures1TeammateV1,
                          actions: DiscreteActions1TeammateV1,
                          pos_name: str = None):
    if pos_name:
        origin_pos = ORIGIN_POSITIONS[pos_name]
    else:
        pos_name, origin_pos = random.choice(list(ORIGIN_POSITIONS.items()))
    # print("\nMoving to starting point: {0}".format(pos_name))
    pos = features.get_pos_tuple(round_ndigits=1)
    while origin_pos != pos:
        has_ball = features.has_ball()
        hfo_action: tuple = actions.dribble_to_pos(origin_pos)
        status, observation = game_interface.step(hfo_action, has_ball)
        features.update_features(observation)
        pos = features.get_pos_tuple(round_ndigits=1)
    # Informs the teammate that it is ready to start the game
    teammate_last_coord = features.teammate_coord.copy()
    counter = 0
    while teammate_last_coord.tolist() == features.teammate_coord.tolist():
        if counter >= 10:
            # print("STOP repeating the message")
            break
        game_interface.hfo.say(settings.PLAYER_READY_MSG)
        game_interface.hfo.step()
        observation = game_interface.hfo.getState()
        features.update_features(observation)
        # print("Action said READY!")
        counter += 1
Exemple #10
0
 def __init__(self, num_opponents: int, num_teammates: int,
              port: int = 6000):
     # Game Interface:
     self.game_interface = HFOAttackingPlayer(num_opponents=num_opponents,
                                              num_teammates=num_teammates,
                                              port=port)
     self.game_interface.connect_to_server()
     # Features Interface:
     self.features = DiscFeatures1Teammate(num_op=num_opponents,
                                           num_team=num_teammates)
     # Actions Interface:
     self.actions = Actions()
     # Agent instance:
     self.agent = QAgent(num_features=self.features.num_features,
                         num_actions=self.actions.get_num_actions(),
                         learning_rate=0.1, discount_factor=0.9, epsilon=0.8)
Exemple #11
0
 def kick_to_pos(self, pos: tuple, features: BaseHighLevelState,
                 game_interface: HFOAttackingPlayer):
     """ The agent kicks to position expected """
     hfo_action = (KICK_TO, pos[0], pos[1], 2)
     status, observation = game_interface.step(hfo_action,
                                               features.has_ball())
     # Update features:
     features.update_features(observation)
def pass_ball(game_interface: HFOAttackingPlayer,
              features: DiscreteFeatures1TeammateV1):
    # print("pass_ball!")
    attempts = 0
    while game_interface.in_game() and features.has_ball():
        if attempts > 2:
            break
        elif attempts == 2:
            # Failed to pass 2 times
            print("Failed to PASS two times. WILL KICK")
            y = random.choice([0.17, 0, -0.17])
            hfo_action = (KICK_TO, 0.9, y, 2)
        else:
            hfo_action = (PASS, 11)
        status, observation = game_interface.step(hfo_action,
                                                  features.has_ball())
        features.update_features(observation)
        attempts += 1
    return status, observation
def shoot_ball(game_interface: HFOAttackingPlayer,
               features: DiscreteFeatures1TeammateV1):
    # print("shoot_ball!")
    attempts = 0
    while game_interface.in_game() and features.has_ball():
        if attempts > 3:
            break
        elif attempts == 3:
            # Failed to kick four times
            print("Failed to SHOOT 3 times. WILL KICK")
            y = random.choice([0.17, 0, -0.17])
            hfo_action = (KICK_TO, 0.9, y, 2)
        else:
            hfo_action = (SHOOT, )
        status, observation = game_interface.step(hfo_action,
                                                  features.has_ball())
        features.update_features(observation)
        attempts += 1
    return status, observation
Exemple #14
0
 def shoot_ball(self, game_interface: HFOAttackingPlayer,
                features: DiscreteFeatures1Teammate):
     """ Tries to shoot, if it fail, kicks to goal randomly """
     attempts = 0
     while game_interface.in_game() and features.has_ball():
         if attempts > 3:
             break
         elif attempts == 3:
             # Failed to kick four times
             # print("Failed to SHOOT 3 times. WILL KICK")
             y = random.choice([0.17, 0, -0.17])
             hfo_action = (KICK_TO, 0.9, y, 2)
         else:
             hfo_action = (SHOOT,)
         _, obs = game_interface.step(hfo_action, features.has_ball())
         features.update_features(obs)
         attempts += 1
     return game_interface.get_game_status(), \
         game_interface.get_observation_array()
Exemple #15
0
def train(num_episodes: int, game_interface: HFOAttackingPlayer,
          features: DiscreteHighLevelFeatures, agent: QLearningAgent,
          actions: ActionManager):
    for ep in range(num_episodes):
        print('<Training> Episode {}/{}:'.format(ep, num_episodes))
        while game_interface.in_game():
            # Update environment features:
            observation = game_interface.get_state()
            curr_state_id = features.get_state_index(observation)
            has_ball = features.has_ball(observation)

            # Act:
            action_idx = agent.act(curr_state_id)
            hfo_action = actions.map_action(action_idx)

            # Step:
            status, observation = game_interface.step(hfo_action, has_ball)
            reward = reward_function(status)

            # Save metrics:
            agent.save_visited_state(curr_state_id, action_idx)
            agent.cum_reward += reward

            # Update environment features:
            prev_state_id = curr_state_id
            curr_state_id = features.get_state_index(observation)

            # Update agent
            agent.learn(prev_state_id, action_idx, reward, status,
                        curr_state_id)
        print(':: Episode: {}; reward: {}'.format(ep, agent.cum_reward))
        agent.save_metrics(agent.old_q_table, agent.q_table)
        # Reset player:
        agent.reset()
        agent.update_hyper_parameters()
        # Game Reset
        game_interface.reset()
    agent.save_model()
    actions_name = [
        actions_manager.map_action_to_str(i) for i in range(agent.num_actions)
    ]
    agent.export_metrics(training=False, actions_name=actions_name)
Exemple #16
0
 def pass_ball(self, game_interface: HFOAttackingPlayer,
               features: DiscreteFeatures1Teammate):
     """ Tries to use the PASS action, if it fails, Kicks in the direction
     of the teammate"""
     attempts = 0
     while game_interface.in_game() and features.has_ball():
         if attempts > 2:
             break
         elif attempts == 2:
             # Failed to pass 2 times
             # print("Failed to PASS two times. WILL KICK")
             y = random.choice([0.17, 0, -0.17])
             hfo_action = (KICK_TO, 0.9, y, 2)
         else:
             hfo_action = (PASS, 11)
         _, obs = game_interface.step(hfo_action, features.has_ball())
         features.update_features(obs)
         attempts += 1
     return game_interface.get_game_status(), \
         game_interface.get_observation_array()
Exemple #17
0
 def dribble_to_pos(self, pos: tuple, features: DiscreteFeatures1Teammate,
                    game_interface: HFOAttackingPlayer):
     """ The agent keeps dribbling until reach the position expected """
     curr_pos = features.get_pos_tuple(round_ndigits=1)
     while pos != curr_pos:
         hfo_action = (DRIBBLE_TO, pos[0], pos[1])
         status, observation = game_interface.step(hfo_action,
                                                   features.has_ball())
         # Update features:
         features.update_features(observation)
         curr_pos = features.get_pos_tuple(round_ndigits=1)
Exemple #18
0
 def move_to_pos(self, pos: tuple, features: BaseHighLevelState,
                 game_interface: HFOAttackingPlayer):
     """ The agent keeps moving until reach the position expected """
     curr_pos = features.get_pos_tuple(round_ndigits=1)
     while pos != curr_pos:
         hfo_action = (MOVE_TO, pos[0], pos[1])
         status, observation = game_interface.step(hfo_action,
                                                   features.has_ball())
         # Update features:
         features.update_features(observation)
         curr_pos = features.get_pos_tuple(round_ndigits=1)
Exemple #19
0
 def shoot_ball(self, game_interface: HFOAttackingPlayer,
                features: BaseHighLevelState):
     """ Tries to shoot, if it fail, kicks to goal randomly """
     # Get best shoot angle:
     angles = []
     goalie_coord = np.array([features.opponents[0].x_pos,
                              features.opponents[0].y_pos])
     player_coord = np.array(features.get_pos_tuple())
     for goal_pos in self.shoot_possible_coord:
         angles.append(get_angle(goalie=goalie_coord, player=player_coord,
                                 point=goal_pos))
     idx = int(np.argmax(np.array(angles)))
     best_shoot_coord = self.shoot_possible_coord[idx]
     # Action parameters:
     hfo_action = (KICK_TO, best_shoot_coord[0], best_shoot_coord[1], 2.5)
     # Step game:
     _, obs = game_interface.step(hfo_action, features.has_ball())
     # Update features:
     features.update_features(obs)
     return game_interface.get_game_status(), \
         game_interface.get_observation_array()
Exemple #20
0
def test(train_ep: int, num_episodes: int, game_interface: HFOAttackingPlayer,
         features: discrete_features_v2.DiscreteFeaturesV2,
         agent: QLearningAgentV4, actions: DiscreteActionsV2, reward_funct):
    # Run training using Q-Learning
    score = 0
    agent.test_episodes.append(train_ep)
    for ep in range(num_episodes):
        print('<Test> {}/{}:'.format(ep, num_episodes))
        prev_state_id =-1
        while game_interface.in_game():
            # Update environment features:
            features.update_features(game_interface.get_state())
            curr_state_id = features.get_state_index()
            has_ball = features.has_ball()

            # Act:
            if prev_state_id != curr_state_id:
                print([round(val, 2) for val in agent.q_table[curr_state_id]])
            action_idx = agent.exploit_actions(curr_state_id)
            hfo_action: tuple = actions.map_action_idx_to_hfo_action(
                agent_pos=features.get_pos_tuple(), has_ball=has_ball,
                action_idx=action_idx)
            
            # Step:
            status, observation = game_interface.step(hfo_action, has_ball)
            prev_state_id = curr_state_id
            
            # Save Metrics:
            agent.save_visited_state(curr_state_id, action_idx)
            agent.cum_reward += reward_funct(status)
        print(':: Episode: {}; reward: {}'.format(ep, agent.cum_reward))
        score += 1 if game_interface.status == GOAL else 0
        # Reset player:
        agent.reset(training=False)
        # Game Reset
        game_interface.reset()
    agent.scores.append(score)
    actions_name = [actions_manager.map_action_to_str(i, has_ball=True) for i in
                    range(agent.num_actions)]
    agent.export_metrics(training=False, actions_name=actions_name)
def test(num_episodes: int, game_interface: HFOAttackingPlayer,
         features: discrete_features_v2.DiscreteFeaturesV2,
         agent: QLearningAgentV5, actions: DiscreteActionsV5, reward_funct):
    """
    @param num_episodes: number of episodes to run
    @param game_interface: game interface, that manages interactions
    between both;
    @param features: features interface, from the observation array, gets the
    main features for the agent;
    @param agent: learning agent;
    @param actions: actions interface;
    @param reward_funct: reward function used
    @return: (int) the avarage reward
    """
    # Run training using Q-Learning
    sum_score = 0
    for ep in range(num_episodes):
        print('<Test> {}/{}:'.format(ep, num_episodes))
        # Go to origin position:
        features.update_features(game_interface.get_state())
        go_to_origin_position(game_interface=game_interface,
                              features=features,
                              actions=actions)
        # Test loop:
        while game_interface.in_game():
            # Update environment features:
            curr_state_id = features.get_state_index()
            has_ball = features.has_ball()

            # Act:
            action_idx = agent.exploit_actions(curr_state_id)
            hfo_action_params, num_rep = \
                actions.map_action_idx_to_hfo_action(
                    agent_pos=features.get_pos_tuple(), has_ball=has_ball,
                    action_idx=action_idx)

            action_name = actions.map_action_to_str(action_idx, has_ball)

            # Step:
            rep_counter_aux = 0
            while game_interface.in_game() and rep_counter_aux < num_rep:
                status, observation = game_interface.step(
                    hfo_action_params, has_ball)
                rep_counter_aux += 1
            reward = reward_funct(status)

            # update features:
            features.update_features(observation)

            # Save metrics:
            agent.save_visited_state(curr_state_id, action_idx)
            sum_score += reward

        # Reset player:
        agent.reset(training=False)
        # Game Reset
        game_interface.reset()
    return sum_score / num_episodes
Exemple #22
0
def go_to_origin_position(game_interface: HFOAttackingPlayer,
                          features: DiscreteFeatures,
                          actions: DiscreteActionsV5,
                          random_start: bool = True):
    if random_start:
        pos_name, origin_pos = random.choice(list(ORIGIN_POSITIONS.items()))
    else:
        pos_name = "Fixed start"
        origin_pos = features.get_pos_tuple()
    print("Moving to starting point: {0}".format(pos_name))
    pos = features.get_pos_tuple(round_ndigits=1)
    while origin_pos != pos:
        has_ball = features.has_ball()
        hfo_action: tuple = actions.dribble_to_pos(origin_pos)
        status, observation = game_interface.step(hfo_action, has_ball)
        features.update_features(observation)
        pos = features.get_pos_tuple(round_ndigits=1)
Exemple #23
0
    def dribble_to_pos(self, pos: tuple, features: BaseHighLevelState,
                       game_interface: HFOAttackingPlayer):
        """ The agent keeps dribbling until reach the position expected """
        def check_valid_pos(pos_tuple: tuple):
            for pos_aux in pos_tuple:
                try:
                    num_digits = len(str(pos_aux).split(".")[1])
                    if num_digits >= 2:
                        return False
                except IndexError:
                    pass
            return True

        if check_valid_pos(pos) is False:
            raise Exception("Initial positions invalid. Initial positions "
                            "should be a float with 1 digit or less")
        curr_pos = features.get_pos_tuple(round_ndigits=1)
        while pos != curr_pos:
            hfo_action = (DRIBBLE_TO, pos[0], pos[1])
            status, observation = game_interface.step(hfo_action,
                                                      features.has_ball())
            # Update features:
            features.update_features(observation)
            curr_pos = features.get_pos_tuple(round_ndigits=1)
Exemple #24
0
 def do_nothing(self, game_interface: HFOAttackingPlayer,
                features: DiscreteFeatures1Teammate):
     action = (NOOP,)
     status, observation = game_interface.step(action, features.has_ball())
     return status, observation
Exemple #25
0
class Player:
    def __init__(self, num_opponents: int, num_teammates: int):
        # Game Interface:
        self.game_interface = HFOAttackingPlayer(num_opponents=num_opponents,
                                                 num_teammates=num_teammates)
        self.game_interface.connect_to_server()
        # Features Interface:
        self.features = DiscreteFeatures1Teammate(num_op=num_opponents,
                                                  num_team=num_teammates)
        # Actions Interface:
        self.actions = DiscreteActionsModule()
        # Agent instance:
        self.agent = QAgent(num_features=self.features.num_features,
                            num_actions=self.actions.get_num_actions(),
                            learning_rate=0.1,
                            discount_factor=0.9,
                            epsilon=1,
                            final_epsilon=0.3)

    def get_reward(self, status: int) -> int:
        return basic_reward(status)

    def set_starting_game_conditions(self,
                                     game_interface: HFOAttackingPlayer,
                                     features: DiscreteFeatures1Teammate,
                                     start_with_ball: bool = True,
                                     start_pos: tuple = None):
        """
        Set starting game conditions. Move for initial position, for example
        """
        if not start_pos:
            pos_name, start_pos = random.choice(
                list(STARTING_POSITIONS.items()))
        if start_with_ball:
            # Move to starting position:
            self.actions.dribble_to_pos(start_pos, features, game_interface)
        else:
            if self.features.has_ball():
                self.actions.kick_to_pos((0, 0), features, game_interface)
            # Move to starting position:
            self.actions.move_to_pos(start_pos, features, game_interface)
        # Informs the other players that it is ready to start:
        game_interface.hfo.say(settings.PLAYER_READY_MSG)

    def train(self,
              num_train_episodes: int,
              num_total_train_ep: int,
              start_with_ball: bool = True):
        """
        @param num_train_episodes: number of episodes to train in this iteration
        @param num_total_train_ep: number total of episodes to train
        @param start_with_ball: bool
        @raise ServerDownError
        @return: (QLearningAgentV5) the agent
        """
        # metrics variables:
        _num_wins = 0
        _sum_epsilons = 0
        for ep in range(num_train_episodes):
            # Check if server still running:
            self.game_interface.check_server_is_up()
            # Update features:
            self.features.update_features(self.game_interface.get_state())
            # Go to origin position:
            self.set_starting_game_conditions(
                game_interface=self.game_interface,
                features=self.features,
                start_with_ball=start_with_ball)

            # Start learning loop
            goal = False  # bool flag
            while self.game_interface.in_game():
                # Update environment features:
                features_array = self.features.get_features().copy()

                # Act:
                action_idx = self.agent.act(features_array)
                status = self.actions.execute_action(
                    action_idx=action_idx,
                    features=self.features,
                    game_interface=self.game_interface)

                # Every step we update replay memory and train main network
                done = not self.game_interface.in_game()
                goal = self.game_interface.scored_goal()
                self.agent.store_transition(
                    curr_st=features_array,
                    action_idx=action_idx,
                    reward=self.get_reward(status),
                    new_st=self.features.get_features(),
                    done=done)

            # Train
            self.agent.train(goal)
            # Update auxiliar variables:
            _sum_epsilons += self.agent.epsilon
            _num_wins += 1 if self.game_interface.scored_goal() else 0
            # Update Agent:
            self.agent.restart(num_total_train_ep)
            # Game Reset
            self.game_interface.reset()
        print("[TRAIN: Summary] WIN rate = {}; AVR epsilon = {}".format(
            _num_wins / num_train_episodes,
            _sum_epsilons / num_train_episodes))

    def test(self,
             num_episodes: int,
             start_with_ball: bool = True,
             training: bool = False) -> float:
        """
        @param num_episodes: number of episodes to run
        @param start_with_ball: flag
        @param training: flag
        @return: (float) the win rate
        """
        starting_pos_list = list(STARTING_POSITIONS.values())
        # metrics variables:
        _num_wins = 0
        for ep in range(num_episodes):
            # Check if server still running:
            self.game_interface.check_server_is_up()
            # Update features:
            self.features.update_features(self.game_interface.get_state())
            # Set up gaming conditions:
            self.set_starting_game_conditions(
                game_interface=self.game_interface,
                features=self.features,
                start_pos=starting_pos_list[ep % len(starting_pos_list)],
                start_with_ball=start_with_ball)

            # Start learning loop
            prev_action_idx = None
            while self.game_interface.in_game():
                # Update environment features:
                features_array = self.features.get_features().copy()

                # Act:
                action_idx = self.agent.exploit_actions(features_array)
                if prev_action_idx != action_idx and not training:
                    print("ACTION:: {}".format(
                        self.actions.map_action_to_str(
                            action_idx, self.features.has_ball())))
                prev_action_idx = action_idx

                self.actions.execute_action(action_idx=action_idx,
                                            features=self.features,
                                            game_interface=self.game_interface)

            # Update auxiliar variables:
            _num_wins += 1 if self.game_interface.scored_goal() else 0
            # Game Reset
            self.game_interface.reset()
        avr_win_rate = _num_wins / num_episodes
        print("[TEST: Summary] WIN rate = {};".format(avr_win_rate))
        return avr_win_rate
Exemple #26
0
 def no_ball_action(self, game_interface: HFOAttackingPlayer,
                    features: BaseHighLevelState) -> int:
     action = (MOVE, )
     status, observation = game_interface.step(action, features.has_ball())
     features.update_features(observation)
     return status
Exemple #27
0
 def do_nothing(self, game_interface: HFOAttackingPlayer,
                features: BaseHighLevelState):
     action = (NOOP, )
     status, observation = game_interface.step(action, features.has_ball())
     return status, observation
Exemple #28
0
    parser.add_argument('--num_episodes', type=int, default=500)
    parser.add_argument('--save_file', type=str, default=None)

    args = parser.parse_args()
    agent_id = args.id
    num_team = args.num_teammates
    num_op = args.num_opponents
    num_episodes = args.num_episodes
    saving_file = args.save_file

    print("Starting Training - id={}; num_opponents={}; num_teammates={}; "
          "num_episodes={}; saveFile={};".format(agent_id, num_op, num_team,
                                                 num_episodes, saving_file))
    # Initialize connection with the HFO server
    hfo_interface = HFOAttackingPlayer(agent_id=agent_id,
                                       num_opponents=num_op,
                                       num_teammates=num_team)
    hfo_interface.connect_to_server()

    # Reward Function
    reward_function = simple_reward

    # Get number of features and actions
    features_manager = DiscreteHighLevelFeatures(num_team, num_op)
    actions_manager = ActionManager([SHOOT, MOVE, DRIBBLE])

    # Initialize a Q-Learning Agent
    agent = QLearningAgent(num_states=features_manager.get_num_states(),
                           num_actions=actions_manager.get_num_actions(),
                           learning_rate=0.1,
                           discount_factor=0.99,
Exemple #29
0
def train(num_train_episodes: int, num_total_train_ep: int,
          game_interface: HFOAttackingPlayer,
          features: DiscreteFeatures1TeammateV1, agent: QLearningAgent,
          actions: DiscreteActions1TeammateV1, reward_funct):
    """
    @param num_train_episodes: number of episodes to train in this iteration
    @param num_total_train_ep: number total of episodes to train
    @param game_interface: game interface, that manages interactions
    between both;
    @param features: features interface, from the observation array, gets
    the main features for the agent;
    @param agent: learning agent;
    @param actions: actions interface;
    @param reward_funct: reward function used
    @return: (QLearningAgentV5) the agent
    """
    sum_score = 0
    sum_epsilons = 0
    agent.counter_explorations = 0
    agent.counter_exploitations = 0
    for ep in range(num_train_episodes):
        # Check if server still up:
        if game_interface.hfo.step() == SERVER_DOWN:
            raise ServerDownError("training; episode={}".format(ep))
        # Go to origin position:
        features.update_features(game_interface.get_state())
        go_to_origin_position(game_interface=game_interface,
                              features=features,
                              actions=actions)
        # Start learning loop
        debug_counter = 0  # TODO remove
        while game_interface.in_game():
            # Update environment features:
            curr_state_id = features.get_state_index()
            has_ball = features.has_ball()

            # Act:
            debug_counter += 1
            action_idx = agent.act(curr_state_id)
            action_name = actions.map_action_to_str(action_idx, has_ball)
            # print("Agent playing {} for {}".format(action_name, num_rep))

            # Step:
            status = execute_action(action_name=action_name,
                                    features=features,
                                    game_interface=game_interface)

            # Update environment features:
            reward = reward_funct(status)
            sum_score += reward
            new_state_id = features.get_state_index()
            agent.store_ep(state_idx=curr_state_id,
                           action_idx=action_idx,
                           reward=reward,
                           next_state_idx=new_state_id,
                           has_ball=has_ball,
                           done=not game_interface.in_game())
        if game_interface.get_game_status() == OUT_OF_TIME:
            if debug_counter < 5:
                raise NoActionPlayedError(
                    "agent was only able to choose {}".format(debug_counter))
        agent.learn_buffer()
        agent.update_hyper_parameters(num_total_episodes=num_total_train_ep)
        sum_epsilons += agent.epsilon
        # Game Reset
        game_interface.reset()
    print("<<TRAIN>> AVR reward = ", sum_score / num_train_episodes)
    print("<<TRAIN>> %Explorations={}% ".format(
        round(
            (agent.counter_explorations /
             (agent.counter_exploitations + agent.counter_explorations)), 4) *
        100))
Exemple #30
0
    args = parser.parse_args()
    num_team = args.num_teammates
    num_op = args.num_opponents
    num_train_ep = args.num_train_ep
    num_test_ep = args.num_test_ep
    num_repetitions = args.num_repetitions
    num_episodes = (num_train_ep + num_test_ep) * num_repetitions
    # Load Model
    model_file = args.model_file
    # Directory
    save_dir = args.save_dir or mkdir(
        num_episodes, num_op, extra_note="retrain")

    # Initialize connection with the HFO server
    hfo_interface = HFOAttackingPlayer(num_opponents=num_op,
                                       num_teammates=num_team)
    hfo_interface.connect_to_server()
    print("Starting Training - id={}; num_opponents={}; num_teammates={}; "
          "num_episodes={};".format(hfo_interface.hfo.getUnum(), num_op,
                                    num_team, num_episodes))

    # Agent set-up
    reward_function = basic_reward
    features_manager = DiscreteFeatures1TeammateV1(num_team, num_op)
    actions_manager = DiscreteActions1TeammateV1()

    # Q Agent:
    agent = QLearningAgent(num_states=features_manager.get_num_states(),
                           num_actions=actions_manager.get_num_actions(),
                           learning_rate=0.1,
                           discount_factor=0.9,