Ejemplo n.º 1
0
    def move_agent(self, action_name, game_interface: HFOAttackingPlayer,
                   features: BaseHighLevelState):
        """ Agent Moves/Dribbles in a specific direction """

        # Get Movement type:
        action = DRIBBLE_TO

        if "UP" in action_name:
            action = (action, features.agent.x_pos, -0.9)
        elif "DOWN" in action_name:
            action = (action, features.agent.x_pos, 0.9)
        elif "LEFT" in action_name:
            action = (action, -0.8, features.agent.y_pos)
        elif "RIGHT" in action_name:
            action = (action, 0.8, features.agent.y_pos)
        else:
            raise ValueError("ACTION NAME is WRONG")

        attempts = 0
        while game_interface.in_game() and attempts < self.action_num_episodes:
            status, observation = game_interface.step(action,
                                                      features.has_ball())
            features.update_features(observation)
            attempts += 1
        return game_interface.get_game_status(), \
            game_interface.get_observation_array()
Ejemplo n.º 2
0
def execute_action(action_params: tuple, repetitions: int,
                   game_interface: HFOAttackingPlayer, has_ball: bool):
    rep_counter_aux = 0
    observation = []
    while game_interface.in_game() and rep_counter_aux < repetitions:
        status, observation = game_interface.step(action_params, has_ball)
        rep_counter_aux += 1
    return game_interface.get_game_status(), observation
Ejemplo n.º 3
0
 def shoot_ball(self, game_interface: HFOAttackingPlayer,
                features: DiscreteFeatures1Teammate):
     """ Tries to shoot, if it fail, kicks to goal randomly """
     attempts = 0
     while game_interface.in_game() and features.has_ball():
         if attempts > 3:
             break
         elif attempts == 3:
             # Failed to kick four times
             # print("Failed to SHOOT 3 times. WILL KICK")
             y = random.choice([0.17, 0, -0.17])
             hfo_action = (KICK_TO, 0.9, y, 2)
         else:
             hfo_action = (SHOOT,)
         _, obs = game_interface.step(hfo_action, features.has_ball())
         features.update_features(obs)
         attempts += 1
     return game_interface.get_game_status(), \
         game_interface.get_observation_array()
Ejemplo n.º 4
0
 def pass_ball(self, game_interface: HFOAttackingPlayer,
               features: DiscreteFeatures1Teammate):
     """ Tries to use the PASS action, if it fails, Kicks in the direction
     of the teammate"""
     attempts = 0
     while game_interface.in_game() and features.has_ball():
         if attempts > 2:
             break
         elif attempts == 2:
             # Failed to pass 2 times
             # print("Failed to PASS two times. WILL KICK")
             y = random.choice([0.17, 0, -0.17])
             hfo_action = (KICK_TO, 0.9, y, 2)
         else:
             hfo_action = (PASS, 11)
         _, obs = game_interface.step(hfo_action, features.has_ball())
         features.update_features(obs)
         attempts += 1
     return game_interface.get_game_status(), \
         game_interface.get_observation_array()
Ejemplo n.º 5
0
 def shoot_ball(self, game_interface: HFOAttackingPlayer,
                features: BaseHighLevelState):
     """ Tries to shoot, if it fail, kicks to goal randomly """
     # Get best shoot angle:
     angles = []
     goalie_coord = np.array([features.opponents[0].x_pos,
                              features.opponents[0].y_pos])
     player_coord = np.array(features.get_pos_tuple())
     for goal_pos in self.shoot_possible_coord:
         angles.append(get_angle(goalie=goalie_coord, player=player_coord,
                                 point=goal_pos))
     idx = int(np.argmax(np.array(angles)))
     best_shoot_coord = self.shoot_possible_coord[idx]
     # Action parameters:
     hfo_action = (KICK_TO, best_shoot_coord[0], best_shoot_coord[1], 2.5)
     # Step game:
     _, obs = game_interface.step(hfo_action, features.has_ball())
     # Update features:
     features.update_features(obs)
     return game_interface.get_game_status(), \
         game_interface.get_observation_array()
Ejemplo n.º 6
0
    def move_agent(self, action_name, game_interface: HFOAttackingPlayer,
                   features: DiscFeatures1Teammate):
        """ Agent Moves/Dribbles in a specific direction """
        # print("move_agent!")
        if "SHORT" in action_name:
            num_repetitions = 10
        elif "LONG" in action_name:
            num_repetitions = 20
        else:
            raise ValueError("ACTION NAME is WRONG")

        # Get Movement type:
        if "MOVE" in action_name:
            action = MOVE_TO
        elif "DRIBBLE" in action_name:
            action = DRIBBLE_TO
        else:
            raise ValueError("ACTION NAME is WRONG")

        if "UP" in action_name:
            action = (action, features.agent_coord[0], -0.9)
        elif "DOWN" in action_name:
            action = (action, features.agent_coord[0], 0.9)
        elif "LEFT" in action_name:
            action = (action, -0.8, features.agent_coord[1])
        elif "RIGHT" in action_name:
            action = (action, 0.8, features.agent_coord[1])
        else:
            raise ValueError("ACTION NAME is WRONG")

        attempts = 0
        while game_interface.in_game() and attempts < num_repetitions:
            status, observation = game_interface.step(action,
                                                      features.has_ball())
            features.update_features(observation)
            attempts += 1
        return game_interface.get_game_status(), \
            game_interface.get_observation_array()
Ejemplo n.º 7
0
def train(num_train_episodes: int, num_total_train_ep: int,
          game_interface: HFOAttackingPlayer,
          features: DiscreteFeatures1TeammateV1, agent: QLearningAgent,
          actions: DiscreteActions1TeammateV1, reward_funct):
    """
    @param num_train_episodes: number of episodes to train in this iteration
    @param num_total_train_ep: number total of episodes to train
    @param game_interface: game interface, that manages interactions
    between both;
    @param features: features interface, from the observation array, gets
    the main features for the agent;
    @param agent: learning agent;
    @param actions: actions interface;
    @param reward_funct: reward function used
    @return: (QLearningAgentV5) the agent
    """
    sum_score = 0
    sum_epsilons = 0
    agent.counter_explorations = 0
    agent.counter_exploitations = 0
    for ep in range(num_train_episodes):
        # Check if server still up:
        if game_interface.hfo.step() == SERVER_DOWN:
            raise ServerDownError("training; episode={}".format(ep))
        # Go to origin position:
        features.update_features(game_interface.get_state())
        go_to_origin_position(game_interface=game_interface,
                              features=features,
                              actions=actions)
        # Start learning loop
        debug_counter = 0  # TODO remove
        while game_interface.in_game():
            # Update environment features:
            curr_state_id = features.get_state_index()
            has_ball = features.has_ball()

            # Act:
            debug_counter += 1
            action_idx = agent.act(curr_state_id)
            action_name = actions.map_action_to_str(action_idx, has_ball)
            # print("Agent playing {} for {}".format(action_name, num_rep))

            # Step:
            status = execute_action(action_name=action_name,
                                    features=features,
                                    game_interface=game_interface)

            # Update environment features:
            reward = reward_funct(status)
            sum_score += reward
            new_state_id = features.get_state_index()
            agent.store_ep(state_idx=curr_state_id,
                           action_idx=action_idx,
                           reward=reward,
                           next_state_idx=new_state_id,
                           has_ball=has_ball,
                           done=not game_interface.in_game())
        if game_interface.get_game_status() == OUT_OF_TIME:
            if debug_counter < 5:
                raise NoActionPlayedError(
                    "agent was only able to choose {}".format(debug_counter))
        agent.learn_buffer()
        agent.update_hyper_parameters(num_total_episodes=num_total_train_ep)
        sum_epsilons += agent.epsilon
        # Game Reset
        game_interface.reset()
    print("<<TRAIN>> AVR reward = ", sum_score / num_train_episodes)
    print("<<TRAIN>> %Explorations={}% ".format(
        round(
            (agent.counter_explorations /
             (agent.counter_exploitations + agent.counter_explorations)), 4) *
        100))
Ejemplo n.º 8
0
def test(num_episodes: int, game_interface: HFOAttackingPlayer,
         features: DiscreteFeatures1TeammateV1, agent: QLearningAgent,
         actions: DiscreteActions1TeammateV1, reward_funct) -> float:
    """
    @param num_episodes: number of episodes to run
    @param game_interface: game interface, that manages interactions
    between both;
    @param features: features interface, from the observation array, gets the
    main features for the agent;
    @param agent: learning agent;
    @param actions: actions interface;
    @param reward_funct: reward function used
    @return: (float) the win rate
    """
    # Run training using Q-Learning
    num_goals = 0
    start_pos_list = list(ORIGIN_POSITIONS.keys())
    for ep in range(num_episodes):
        # Check if server still up:
        if game_interface.hfo.step() == SERVER_DOWN:
            print("Server is down while testing; episode={}".format(ep))
            break
        # Go to origin position:
        features.update_features(game_interface.get_state())
        # Set game start condition:
        if not start_pos_list:
            start_pos_list = list(ORIGIN_POSITIONS.keys())
        start_pos = start_pos_list.pop()
        go_to_origin_position(game_interface=game_interface,
                              pos_name=start_pos,
                              features=features,
                              actions=actions)
        # Test loop:
        debug_counter = 0  # TODO remove
        while game_interface.in_game():
            # Update environment features:
            curr_state_id = features.get_state_index()
            has_ball = features.has_ball()

            # Act:
            debug_counter += 1
            action_idx = agent.act(curr_state_id)
            action_name = actions.map_action_to_str(action_idx, has_ball)
            print("Agent playing {}".format(action_name))

            # Step:
            status = execute_action(action_name=action_name,
                                    features=features,
                                    game_interface=game_interface)

            # update features:
            reward = reward_funct(status)
        num_goals += 1 if reward == 1 else 0

        if game_interface.get_game_status() == OUT_OF_TIME:
            if debug_counter < 5:
                raise NoActionPlayedError(
                    "agent was only able to choose {}".format(debug_counter))
        # Game Reset
        game_interface.reset()
    print("<<TEST>> NUM Goals = ", num_goals)
    print("<<TEST>> NUM episodes = ", (ep + 1))
    print("<<TEST>> AVR win rate = ", num_goals / (ep + 1))
    return num_goals / num_episodes