예제 #1
0
def test(num_episodes: int, game_interface: HFOAttackingPlayer,
         features: DiscreteFeatures1TeammateV1, agent: QLearningAgent,
         actions: DiscreteActions1TeammateV1, reward_funct) -> float:
    """
    @param num_episodes: number of episodes to run
    @param game_interface: game interface, that manages interactions
    between both;
    @param features: features interface, from the observation array, gets the
    main features for the agent;
    @param agent: learning agent;
    @param actions: actions interface;
    @param reward_funct: reward function used
    @return: (float) the win rate
    """
    # Run training using Q-Learning
    num_goals = 0
    for ep in range(num_episodes):
        # Check if server still up:
        if game_interface.hfo.step() == SERVER_DOWN:
            print("Server is down while testing; episode={}".format(ep))
            break
        # Go to origin position:
        features.update_features(game_interface.get_state())
        go_to_origin_position(game_interface=game_interface,
                              features=features,
                              actions=actions)
        # Test loop:
        debug_counter = 0  # TODO remove
        while game_interface.in_game():
            # Update environment features:
            curr_state_id = features.get_state_index()
            has_ball = features.has_ball()

            # Act:
            debug_counter += 1
            action_idx = agent.act(curr_state_id)
            action_name = actions.map_action_to_str(action_idx, has_ball)
            print("Agent playing {}".format(action_name))

            # Step:
            status = execute_action(action_name=action_name,
                                    features=features,
                                    game_interface=game_interface)

            # update features:
            reward = reward_funct(status)
        num_goals += 1 if reward == 1 else 0

        if status == OUT_OF_TIME:
            if debug_counter < 5:
                raise NoActionPlayedError(
                    "agent was only able to choose {}".format(debug_counter))
        # Game Reset
        game_interface.reset()
    print("<<TEST>> NUM Goals = ", num_goals)
    print("<<TEST>> NUM episodes = ", (ep + 1))
    print("<<TEST>> AVR win rate = ", num_goals / (ep + 1))
    return num_goals / num_episodes
예제 #2
0
def train(num_train_episodes: int, num_total_train_ep: int,
          game_interface: HFOAttackingPlayer,
          features: DiscreteFeatures1TeammateV1, agent: QLearningAgent,
          actions: DiscreteActions1TeammateV1, reward_funct):
    """
    @param num_train_episodes: number of episodes to train in this iteration
    @param num_total_train_ep: number total of episodes to train
    @param game_interface: game interface, that manages interactions
    between both;
    @param features: features interface, from the observation array, gets
    the main features for the agent;
    @param agent: learning agent;
    @param actions: actions interface;
    @param reward_funct: reward function used
    @return: (QLearningAgentV5) the agent
    """
    sum_score = 0
    sum_epsilons = 0
    agent.counter_explorations = 0
    agent.counter_exploitations = 0
    for ep in range(num_train_episodes):
        # Check if server still up:
        if game_interface.hfo.step() == SERVER_DOWN:
            raise ServerDownError("training; episode={}".format(ep))
        # Go to origin position:
        features.update_features(game_interface.get_state())
        go_to_origin_position(game_interface=game_interface,
                              features=features,
                              actions=actions)
        # Start learning loop
        debug_counter = 0  # TODO remove
        while game_interface.in_game():
            # Update environment features:
            curr_state_id = features.get_state_index()
            has_ball = features.has_ball()

            # Act:
            debug_counter += 1
            action_idx = agent.act(curr_state_id)
            action_name = actions.map_action_to_str(action_idx, has_ball)
            # print("Agent playing {} for {}".format(action_name, num_rep))

            # Step:
            status = execute_action(action_name=action_name,
                                    features=features,
                                    game_interface=game_interface)

            # Update environment features:
            reward = reward_funct(status)
            sum_score += reward
            new_state_id = features.get_state_index()
            agent.store_ep(state_idx=curr_state_id,
                           action_idx=action_idx,
                           reward=reward,
                           next_state_idx=new_state_id,
                           has_ball=has_ball,
                           done=not game_interface.in_game())
        if game_interface.get_game_status() == OUT_OF_TIME:
            if debug_counter < 5:
                raise NoActionPlayedError(
                    "agent was only able to choose {}".format(debug_counter))
        agent.learn_buffer()
        agent.update_hyper_parameters(num_total_episodes=num_total_train_ep)
        sum_epsilons += agent.epsilon
        # Game Reset
        game_interface.reset()
    print("<<TRAIN>> AVR reward = ", sum_score / num_train_episodes)
    print("<<TRAIN>> %Explorations={}% ".format(
        round(
            (agent.counter_explorations /
             (agent.counter_exploitations + agent.counter_explorations)), 4) *
        100))