Ejemplo n.º 1
0
def test(train_ep: int, num_episodes: int, game_interface: HFOAttackingPlayer,
         features: DiscreteHighLevelFeatures, agent: QLearningAgent,
         actions: ActionManager):
    # Run training using Q-Learning
    score = 0
    agent.test_episodes.append(train_ep)
    for ep in range(num_episodes):
        print('<Test> {}/{}:'.format(ep, num_episodes))
        while game_interface.in_game():
            # Update environment features:
            observation = game_interface.get_state()
            curr_state_id = features.get_state_index(observation)
            has_ball = features.has_ball(observation)

            # Act:
            action_idx = agent.exploit_actions(curr_state_id)
            hfo_action = actions.map_action(action_idx)

            # Step:
            status, observation = game_interface.step(hfo_action, has_ball)

            # Save Metrics:
            agent.save_visited_state(curr_state_id, action_idx)
            agent.cum_reward += reward_function(status)
        print(':: Episode: {}; reward: {}'.format(ep, agent.cum_reward))
        score += 1 if game_interface.status == GOAL else 0
        # Reset player:
        agent.reset(training=False)
        # Game Reset
        game_interface.reset()
    agent.scores.append(score)
    actions_name = [
        actions_manager.map_action_to_str(i) for i in range(agent.num_actions)
    ]
    agent.export_metrics(training=False, actions_name=actions_name)
Ejemplo n.º 2
0
def train(num_episodes: int, game_interface: HFOAttackingPlayer,
          features: DiscreteHighLevelFeatures, agent: QLearningAgent,
          actions: ActionManager):
    for ep in range(num_episodes):
        print('<Training> Episode {}/{}:'.format(ep, num_episodes))
        while game_interface.in_game():
            # Update environment features:
            observation = game_interface.get_state()
            curr_state_id = features.get_state_index(observation)
            has_ball = features.has_ball(observation)

            # Act:
            action_idx = agent.act(curr_state_id)
            hfo_action = actions.map_action(action_idx)

            # Step:
            status, observation = game_interface.step(hfo_action, has_ball)
            reward = reward_function(status)

            # Save metrics:
            agent.save_visited_state(curr_state_id, action_idx)
            agent.cum_reward += reward

            # Update environment features:
            prev_state_id = curr_state_id
            curr_state_id = features.get_state_index(observation)

            # Update agent
            agent.learn(prev_state_id, action_idx, reward, status,
                        curr_state_id)
        print(':: Episode: {}; reward: {}'.format(ep, agent.cum_reward))
        agent.save_metrics(agent.old_q_table, agent.q_table)
        # Reset player:
        agent.reset()
        agent.update_hyper_parameters()
        # Game Reset
        game_interface.reset()
    agent.save_model()
    actions_name = [
        actions_manager.map_action_to_str(i) for i in range(agent.num_actions)
    ]
    agent.export_metrics(training=False, actions_name=actions_name)
Ejemplo n.º 3
0
 actions = Action()
 agent = QLearner(num_states=env.get_num_states(),
                  num_actions=actions.get_num_actions(),
                  epsilon=epsilon,
                  learning_rate=lr,
                  discount_factor=discount_factor,
                  save_file=save_file)
 # Saving lists
 scores = []
 eps_history = []
 for i in range(n_games):
     print("\n<< {}/{} Game >> eps={}".format(i, n_games, agent.epsilon))
     game_status = IN_GAME
     score = 0
     while game_status == IN_GAME:
         action_idx = agent.choose_action(env.get_state_index())
         hfo_action = actions.map_action(action_idx)
         val = hfo.act(hfo_action)
         if val is not None:
             print("Action returned " + str(val))
         game_status = hfo.step()
         if score == 0 and game_status == SERVER_DOWN:
             print("Trying to reconnect")
             hfo.connectToServer(feature_set=HIGH_LEVEL_FEATURE_SET,
                                 server_port=6000)
             print("|| Action: {}".format(hfo.actionToString(NOOP)))
             hfo.act(NOOP)
             game_status = hfo.step()
             print("|| New game status: {}".format(
                 hfo.statusToString(game_status)))
             game_status = IN_GAME
Ejemplo n.º 4
0
    # Initialize a Q-Learning Agent
    agent = QLearningAgent(num_states=features_manager.get_num_states(),
                           num_actions=actions_manager.get_num_actions(),
                           learning_rate=0.1,
                           discount_factor=0.99,
                           epsilon=1.0,
                           num_games=num_episodes,
                           save_file=saving_file)

    # Run training using Q-Learning
    for i in range(num_episodes):
        print('\n=== Episode {}/{}:'.format(i, num_episodes))
        agent.reset(i)
        observation = hfo_interface.reset()
        # Update environment features:
        curr_state_id = features_manager.get_state_index(observation)
        has_ball = features_manager.has_ball(observation)

        while hfo_interface.in_game():
            action_idx = agent.act(curr_state_id)
            hfo_action = actions_manager.map_action(action_idx)

            status, observation = hfo_interface.step(hfo_action, has_ball)
            reward = reward_function(status)

            # Update environment features:
            prev_state_id = curr_state_id
            curr_state_id = features_manager.get_state_index(observation)
            has_ball = features_manager.has_ball(observation)

            # Update agent
Ejemplo n.º 5
0
 hfo.connectToServer(feature_set=HIGH_LEVEL_FEATURE_SET, server_port=6000)
 env = DiscreteHighLevelFeatures(hfo.getState(), num_teammates,
                                 num_opponents)
 actions = Action()
 agent = QLearner(num_states=env.get_num_states(),
                  num_actions=actions.get_num_actions())
 agent.load_q_table(load_file)
 # Saving lists
 scores = []
 eps_history = []
 for i in range(n_games):
     print("\n<< {}/{} Game >> eps={}".format(i, n_games, agent.epsilon))
     game_status = IN_GAME
     score = 0
     while game_status == IN_GAME:
         action_idx = agent.exploit_action(env.get_state_index())
         hfo_action = actions.map_action(action_idx)
         hfo.act(hfo_action)
         game_status = hfo.step()
         if game_status == SERVER_DOWN:
             hfo.act(QUIT)
             break
         # Reward:
         reward = env.get_reward(game_status, env.agent.can_kick, hfo_action)
         print("|| Action: {} > {} points".format(
             hfo.actionToString(hfo_action),
             reward))
         score += reward
         # Environment:
         new_env = DiscreteHighLevelFeatures(hfo.getState(), num_teammates,
                                             num_opponents)