예제 #1
0
def test(train_ep: int, num_episodes: int, game_interface: HFOAttackingPlayer,
         features: DiscreteHighLevelFeatures, agent: QLearningAgent,
         actions: ActionManager):
    # Run training using Q-Learning
    score = 0
    agent.test_episodes.append(train_ep)
    for ep in range(num_episodes):
        print('<Test> {}/{}:'.format(ep, num_episodes))
        while game_interface.in_game():
            # Update environment features:
            observation = game_interface.get_state()
            curr_state_id = features.get_state_index(observation)
            has_ball = features.has_ball(observation)

            # Act:
            action_idx = agent.exploit_actions(curr_state_id)
            hfo_action = actions.map_action(action_idx)

            # Step:
            status, observation = game_interface.step(hfo_action, has_ball)

            # Save Metrics:
            agent.save_visited_state(curr_state_id, action_idx)
            agent.cum_reward += reward_function(status)
        print(':: Episode: {}; reward: {}'.format(ep, agent.cum_reward))
        score += 1 if game_interface.status == GOAL else 0
        # Reset player:
        agent.reset(training=False)
        # Game Reset
        game_interface.reset()
    agent.scores.append(score)
    actions_name = [
        actions_manager.map_action_to_str(i) for i in range(agent.num_actions)
    ]
    agent.export_metrics(training=False, actions_name=actions_name)
예제 #2
0
def train(num_episodes: int, game_interface: HFOAttackingPlayer,
          features: DiscreteHighLevelFeatures, agent: QLearningAgent,
          actions: ActionManager):
    for ep in range(num_episodes):
        print('<Training> Episode {}/{}:'.format(ep, num_episodes))
        while game_interface.in_game():
            # Update environment features:
            observation = game_interface.get_state()
            curr_state_id = features.get_state_index(observation)
            has_ball = features.has_ball(observation)

            # Act:
            action_idx = agent.act(curr_state_id)
            hfo_action = actions.map_action(action_idx)

            # Step:
            status, observation = game_interface.step(hfo_action, has_ball)
            reward = reward_function(status)

            # Save metrics:
            agent.save_visited_state(curr_state_id, action_idx)
            agent.cum_reward += reward

            # Update environment features:
            prev_state_id = curr_state_id
            curr_state_id = features.get_state_index(observation)

            # Update agent
            agent.learn(prev_state_id, action_idx, reward, status,
                        curr_state_id)
        print(':: Episode: {}; reward: {}'.format(ep, agent.cum_reward))
        agent.save_metrics(agent.old_q_table, agent.q_table)
        # Reset player:
        agent.reset()
        agent.update_hyper_parameters()
        # Game Reset
        game_interface.reset()
    agent.save_model()
    actions_name = [
        actions_manager.map_action_to_str(i) for i in range(agent.num_actions)
    ]
    agent.export_metrics(training=False, actions_name=actions_name)
예제 #3
0
 parser.add_argument('--numTeammates', type=int, default=0)
 parser.add_argument('--numOpponents', type=int, default=1)
 parser.add_argument('--numEpisodes', type=int, default=1)
 parser.add_argument('--saveFile', type=str, default="q_agent.model")
 args = parser.parse_args()
 num_teammates = args.numTeammates
 num_opponents = args.numOpponents
 n_games = args.numEpisodes
 save_file = args.saveFile
 lr = 0.01
 epsilon = 1
 discount_factor = 0.9
 # Useful Instances:
 hfo = HFOEnvironment()
 hfo.connectToServer(feature_set=HIGH_LEVEL_FEATURE_SET, server_port=6000)
 env = DiscreteHighLevelFeatures(hfo.getState(), num_teammates,
                                 num_opponents)
 actions = Action()
 agent = QLearner(num_states=env.get_num_states(),
                  num_actions=actions.get_num_actions(),
                  epsilon=epsilon,
                  learning_rate=lr,
                  discount_factor=discount_factor,
                  save_file=save_file)
 # Saving lists
 scores = []
 eps_history = []
 for i in range(n_games):
     print("\n<< {}/{} Game >> eps={}".format(i, n_games, agent.epsilon))
     game_status = IN_GAME
     score = 0
     while game_status == IN_GAME:
예제 #4
0
    train_mode = args.train_mode
    num_train_ep = args.num_train_ep
    num_test_ep = args.num_test_ep
    num_episodes = num_train_ep + num_test_ep

    print("Starting Training - id={}; num_opponents={}; num_teammates={}; "
          "num_episodes={};".format(agent_id, num_op, num_team, num_episodes))
    # Initialize connection with the HFO server
    hfo_interface = HFOAttackingPlayer(agent_id=agent_id,
                                       num_opponents=num_op,
                                       num_teammates=num_team)
    hfo_interface.connect_to_server()

    # Agent set-up
    reward_function = simple_reward
    features_manager = DiscreteHighLevelFeatures(num_team, num_op)
    actions_manager = ActionManager([SHOOT, MOVE, DRIBBLE])
    agent = QLearningAgent(num_states=features_manager.get_num_states(),
                           num_actions=actions_manager.get_num_actions(),
                           learning_rate=0.1,
                           discount_factor=0.99,
                           epsilon=1.0,
                           epsilon_dec=0.9992)

    # Run training using Q-Learning
    if train_mode == "train_only":
        print('\n=== Train Mode for {}:'.format(num_train_ep))
        train(num_episodes=num_train_ep,
              game_interface=hfo_interface,
              features=features_manager,
              agent=agent,
예제 #5
0
    saving_file = args.save_file

    print("Starting Training - id={}; num_opponents={}; num_teammates={}; "
          "num_episodes={}; saveFile={};".format(agent_id, num_op, num_team,
                                                 num_episodes, saving_file))
    # Initialize connection with the HFO server
    hfo_interface = HFOAttackingPlayer(agent_id=agent_id,
                                       num_opponents=num_op,
                                       num_teammates=num_team)
    hfo_interface.connect_to_server()

    # Reward Function
    reward_function = simple_reward

    # Get number of features and actions
    features_manager = DiscreteHighLevelFeatures(num_team, num_op)
    actions_manager = ActionManager([SHOOT, MOVE, DRIBBLE])

    # Initialize a Q-Learning Agent
    agent = QLearningAgent(num_states=features_manager.get_num_states(),
                           num_actions=actions_manager.get_num_actions(),
                           learning_rate=0.1,
                           discount_factor=0.99,
                           epsilon=1.0,
                           num_games=num_episodes,
                           save_file=saving_file)

    # Run training using Q-Learning
    for i in range(num_episodes):
        print('\n=== Episode {}/{}:'.format(i, num_episodes))
        agent.reset(i)
예제 #6
0
    args = parser.parse_args()
    port = args.port

    hfo = HFOEnvironment()
    hfo.connectToServer(feature_set=HIGH_LEVEL_FEATURE_SET,
                        server_port=port,
                        config_dir=CONFIG_DIR)
    print("Connected")
    for i in range(1):
        status = IN_GAME
        score = 0
        NUM_TEAMMATES = 0
        NUM_OPPONENTS = 2
        observation = hfo.getState()
        env = DiscreteHighLevelFeatures(num_team=NUM_TEAMMATES,
                                        num_op=NUM_OPPONENTS)
        ep_counter = 0
        while status == IN_GAME:
            hfo.act(DRIBBLE)
            # check game status:
            ep_counter += 1
            status = hfo.step()
            env.get_features(hfo.getState())
            print("OP: ", env.agent.proximity_op)
            if status == SERVER_DOWN:
                hfo.act(QUIT)
                break
        """
            
            if bool(hfo_env.has_ball(observation)) is False:
                hfo.act(MOVE)