Python DiscreteFeaturesV2 예제들, environement_features.discrete_features_v2.DiscreteFeaturesV2 Python 예제들

예제 #1

0

파일 보기

def train(num_episodes: int, game_interface: HFOAttackingPlayer,
          features: discrete_features_v2.DiscreteFeaturesV2,
          agent: QLearningAgentV4, actions: DiscreteActionsV2, reward_funct):
    for ep in range(num_episodes):
        # print('<Training> Episode {}/{}:'.format(ep, num_episodes))
        aux_positions_names = set()
        aux_actions_played = set()
        while game_interface.in_game():
            # Update environment features:
            features.update_features(game_interface.get_state())
            curr_state_id = features.get_state_index()
            has_ball = features.has_ball()
            
            # Act:
            action_idx = agent.act(curr_state_id)
            hfo_action: tuple = actions.map_action_idx_to_hfo_action(
                agent_pos=features.get_pos_tuple(), has_ball=has_ball,
                action_idx=action_idx)
            
            # Step:
            status, observation = game_interface.step(hfo_action, has_ball)
            reward = reward_funct(status)
            
            # Save metrics:
            agent.save_visited_state(curr_state_id, action_idx)
            agent.cum_reward += reward
            aux_positions_names.add(features.get_position_name())
            action_name = actions.map_action_to_str(action_idx, has_ball)
            aux_actions_played.add(action_name)
            
            # Update environment features:
            prev_state_id = curr_state_id
            features.update_features(observation)
            curr_state_id = features.get_state_index()
            agent.store_ep(state_idx=prev_state_id, action_idx=action_idx,
                           reward=reward, next_state_idx=curr_state_id,
                           has_ball=has_ball, done=not game_interface.in_game())
        agent.learn()
        # print(':: Episode: {}; reward: {}; epsilon: {}; positions: {}; '
        #       'actions: {}'.format(ep, agent.cum_reward, agent.epsilon,
        #                            aux_positions_names, aux_actions_played))
        agent.save_metrics(agent.old_q_table, agent.q_table)
        # Reset player:
        agent.reset()
        agent.update_hyper_parameters(episode=ep,
                                      num_total_episodes=num_episodes)
        # Game Reset
        game_interface.reset()
    agent.save_model()
    actions_name = [actions_manager.map_action_to_str(i, has_ball=True) for i in
                    range(agent.num_actions)]
    agent.export_metrics(training=True, actions_name=actions_name)

예제 #2

0

파일 보기

파일: learning_agent.py 프로젝트: pedMatias/matias_hfo

def test(num_episodes: int, game_interface: HFOAttackingPlayer,
         features: discrete_features_v2.DiscreteFeaturesV2,
         agent: QLearningAgentV5, actions: DiscreteActionsV5, reward_funct):
    """
    @param num_episodes: number of episodes to run
    @param game_interface: game interface, that manages interactions
    between both;
    @param features: features interface, from the observation array, gets the
    main features for the agent;
    @param agent: learning agent;
    @param actions: actions interface;
    @param reward_funct: reward function used
    @return: (int) the avarage reward
    """
    # Run training using Q-Learning
    sum_score = 0
    for ep in range(num_episodes):
        print('<Test> {}/{}:'.format(ep, num_episodes))
        # Go to origin position:
        features.update_features(game_interface.get_state())
        go_to_origin_position(game_interface=game_interface,
                              features=features,
                              actions=actions)
        # Test loop:
        while game_interface.in_game():
            # Update environment features:
            curr_state_id = features.get_state_index()
            has_ball = features.has_ball()

            # Act:
            action_idx = agent.exploit_actions(curr_state_id)
            hfo_action_params, num_rep = \
                actions.map_action_idx_to_hfo_action(
                    agent_pos=features.get_pos_tuple(), has_ball=has_ball,
                    action_idx=action_idx)

            action_name = actions.map_action_to_str(action_idx, has_ball)

            # Step:
            rep_counter_aux = 0
            while game_interface.in_game() and rep_counter_aux < num_rep:
                status, observation = game_interface.step(
                    hfo_action_params, has_ball)
                rep_counter_aux += 1
            reward = reward_funct(status)

            # update features:
            features.update_features(observation)

            # Save metrics:
            agent.save_visited_state(curr_state_id, action_idx)
            sum_score += reward

        # Reset player:
        agent.reset(training=False)
        # Game Reset
        game_interface.reset()
    return sum_score / num_episodes

예제 #3

0

파일 보기

def test(train_ep: int, num_episodes: int, game_interface: HFOAttackingPlayer,
         features: discrete_features_v2.DiscreteFeaturesV2,
         agent: QLearningAgentV4, actions: DiscreteActionsV2, reward_funct):
    # Run training using Q-Learning
    score = 0
    agent.test_episodes.append(train_ep)
    for ep in range(num_episodes):
        print('<Test> {}/{}:'.format(ep, num_episodes))
        prev_state_id =-1
        while game_interface.in_game():
            # Update environment features:
            features.update_features(game_interface.get_state())
            curr_state_id = features.get_state_index()
            has_ball = features.has_ball()

            # Act:
            if prev_state_id != curr_state_id:
                print([round(val, 2) for val in agent.q_table[curr_state_id]])
            action_idx = agent.exploit_actions(curr_state_id)
            hfo_action: tuple = actions.map_action_idx_to_hfo_action(
                agent_pos=features.get_pos_tuple(), has_ball=has_ball,
                action_idx=action_idx)
            
            # Step:
            status, observation = game_interface.step(hfo_action, has_ball)
            prev_state_id = curr_state_id
            
            # Save Metrics:
            agent.save_visited_state(curr_state_id, action_idx)
            agent.cum_reward += reward_funct(status)
        print(':: Episode: {}; reward: {}'.format(ep, agent.cum_reward))
        score += 1 if game_interface.status == GOAL else 0
        # Reset player:
        agent.reset(training=False)
        # Game Reset
        game_interface.reset()
    agent.scores.append(score)
    actions_name = [actions_manager.map_action_to_str(i, has_ball=True) for i in
                    range(agent.num_actions)]
    agent.export_metrics(training=False, actions_name=actions_name)

예제 #4

0

파일 보기

파일: learning_agent.py 프로젝트: pedMatias/matias_hfo

def go_to_origin_position(game_interface: HFOAttackingPlayer,
                          features: discrete_features_v2.DiscreteFeaturesV2,
                          actions: DiscreteActionsV5,
                          random_start: bool = True):
    if random_start:
        pos_name, origin_pos = random.choice(list(ORIGIN_POSITIONS.items()))
    else:
        pos_name = "Fixed start"
        origin_pos = features.get_pos_tuple()
    print("\nMoving to starting point: {0}".format(pos_name))
    pos = features.get_pos_tuple(round_ndigits=1)
    while origin_pos != pos:
        has_ball = features.has_ball()
        hfo_action: tuple = actions.dribble_to_pos(origin_pos)
        status, observation = game_interface.step(hfo_action, has_ball)
        features.update_features(observation)
        pos = features.get_pos_tuple(round_ndigits=1)

예제 #5

0

파일 보기

파일: learning_agent.py 프로젝트: pedMatias/matias_hfo

def train(num_train_episodes: int, num_total_train_ep: int,
          game_interface: HFOAttackingPlayer,
          features: discrete_features_v2.DiscreteFeaturesV2,
          agent: QLearningAgentV5, actions: DiscreteActionsV5,
          save_metrics: bool, reward_funct):
    """
    @param num_train_episodes: number of episodes to train in this iteration
    @param num_total_train_ep: number total of episodes to train
    @param game_interface: game interface, that manages interactions
    between both;
    @param features: features interface, from the observation array, gets
    the main features for the agent;
    @param agent: learning agent;
    @param actions: actions interface;
    @param save_metrics: flag, if true save the metrics;
    @param reward_funct: reward function used
    @return: (QLearningAgentV5) the agent
    """
    for ep in range(num_train_episodes):
        # Go to origin position:
        features.update_features(game_interface.get_state())
        go_to_origin_position(game_interface=game_interface,
                              features=features,
                              actions=actions)
        # Start learning loop
        aux_positions_names = set()
        aux_actions_played = set()
        while game_interface.in_game():
            # Update environment features:
            curr_state_id = features.get_state_index()
            has_ball = features.has_ball()

            # Act:
            action_idx = agent.act(curr_state_id)
            hfo_action_params, num_rep =\
                actions.map_action_idx_to_hfo_action(
                    agent_pos=features.get_pos_tuple(), has_ball=has_ball,
                    action_idx=action_idx)

            # Step:
            rep_counter_aux = 0
            while game_interface.in_game() and rep_counter_aux < num_rep:
                status, observation = game_interface.step(
                    hfo_action_params, has_ball)
                rep_counter_aux += 1
            reward = reward_funct(status)

            # Save metrics:
            if save_metrics:
                agent.save_visited_state(curr_state_id, action_idx)
                agent.cum_reward += reward
                aux_positions_names.add(features.get_position_name())
                action_name = actions.map_action_to_str(action_idx, has_ball)
                aux_actions_played.add(action_name)

            # Update environment features:
            prev_state_id = curr_state_id
            features.update_features(observation)
            curr_state_id = features.get_state_index()
            agent.store_ep(state_idx=prev_state_id,
                           action_idx=action_idx,
                           reward=reward,
                           next_state_idx=curr_state_id,
                           has_ball=has_ball,
                           done=not game_interface.in_game())
        agent.learn()
        # print(':: Episode: {}; reward: {}; epsilon: {}; positions: {}; '
        #       'actions: {}'.format(ep, agent.cum_reward, agent.epsilon,
        #                            aux_positions_names, aux_actions_played))
        if save_metrics:
            agent.save_metrics(agent.old_q_table, agent.q_table)
        # Reset player:
        agent.reset()
        agent.update_hyper_parameters(episode=agent.train_eps,
                                      num_total_episodes=num_total_train_ep)
        # Game Reset
        game_interface.reset()
    agent.save_model()
    if save_metrics:
        actions_name = [
            actions_manager.map_action_to_str(i, has_ball=True)
            for i in range(agent.num_actions)
        ]
        agent.export_metrics(training=True, actions_name=actions_name)
    return agent

예제 #6

0

파일 보기

    parser = argparse.ArgumentParser()
    parser.add_argument('--port', type=int, default=6000)

    args = parser.parse_args()
    port = args.port
    
    hfo = HFOEnvironment()
    hfo.connectToServer(feature_set=HIGH_LEVEL_FEATURE_SET, server_port=port,
                        config_dir=CONFIG_DIR)
    for i in range(1):
        status = IN_GAME
        score = 0
        NUM_TEAMMATES = 0
        NUM_OPPONENTS = 1
        observation = hfo.getState()
        env = DiscreteFeaturesV2(num_team=NUM_TEAMMATES, num_op=NUM_OPPONENTS)
        went_to_the_corner = False
        ep = 0
        print("NEW GAME:")
        for i in range(4):
            print("New game:")
            print("Status: ",hfo.step())
            status = IN_GAME
            while status == IN_GAME:
                print("waiting observation")
                observation = hfo.getState()
                env.update_features(observation)
                pos_tuple = env.get_pos_tuple()
                print("waiting action")
                if ep < 10:
                    hfo.act(DRIBBLE_TO, -0.7, 0)

예제 #7

0

파일 보기

파일: test_discrete_features.py 프로젝트: pedMatias/matias_hfo

 def setUpClass(cls) -> None:
     super(TestHighLevelEnvironment, cls).setUpClass()
     cls.features_manager = DiscreteFeaturesV2(0, 1)

예제 #8

0

파일 보기

파일: train_player.py 프로젝트: pedMatias/matias_hfo

def test(num_episodes: int, game_interface: HFOAttackingPlayer,
         features: DiscreteFeaturesV2, agent: QLearningAgentV6,
         actions: DiscreteActionsV5, reward_funct) -> float:
    """
    @param num_episodes: number of episodes to run
    @param game_interface: game interface, that manages interactions
    between both;
    @param features: features interface, from the observation array, gets the
    main features for the agent;
    @param agent: learning agent;
    @param actions: actions interface;
    @param reward_funct: reward function used
    @return: (float) the average reward
    """
    # Run training using Q-Learning
    sum_score = 0
    for ep in range(num_episodes):
        # Check if server still up:
        if game_interface.hfo.step() == SERVER_DOWN:
            print("Server is down while testing; episode={}".format(ep))
            break
        # Go to origin position:
        features.update_features(game_interface.get_state())
        go_to_origin_position(game_interface=game_interface,
                              features=features,
                              actions=actions)
        # Test loop:
        debug_counter = 0  # TODO remove
        while game_interface.in_game():
            # Update environment features:
            curr_state_id = features.get_state_index()
            has_ball = features.has_ball()

            # Act:
            debug_counter += 1
            action_idx = agent.exploit_actions(curr_state_id)
            hfo_action_params, num_rep = \
                actions.map_action_idx_to_hfo_action(
                    agent_pos=features.get_pos_tuple(), has_ball=has_ball,
                    action_idx=action_idx)

            # Step:
            status, observation = execute_action(
                action_params=hfo_action_params,
                repetitions=num_rep,
                has_ball=has_ball,
                game_interface=game_interface)

            # update features:
            reward = reward_funct(status)
            features.update_features(observation)
            sum_score += reward

        if status == OUT_OF_TIME:
            if debug_counter < 5:
                raise NoActionPlayedError(
                    "agent was only able to choose {}".format(debug_counter))
        # Game Reset
        game_interface.reset()
    print("<<TEST>> AVR reward = ", sum_score / (ep + 1))
    return sum_score / num_episodes

예제 #9

0

파일 보기

파일: train_player.py 프로젝트: pedMatias/matias_hfo

def train(num_train_episodes: int, num_total_train_ep: int,
          game_interface: HFOAttackingPlayer, features: DiscreteFeaturesV2,
          agent: QLearningAgentV6, actions: DiscreteActionsV5, reward_funct):
    """
    @param num_train_episodes: number of episodes to train in this iteration
    @param num_total_train_ep: number total of episodes to train
    @param game_interface: game interface, that manages interactions
    between both;
    @param features: features interface, from the observation array, gets
    the main features for the agent;
    @param agent: learning agent;
    @param actions: actions interface;
    @param reward_funct: reward function used
    @return: (QLearningAgentV5) the agent
    """
    sum_score = 0
    sum_epsilons = 0
    agent.counter_explorations = 0
    agent.counter_exploitations = 0
    for ep in range(num_train_episodes):
        # Check if server still up:
        # if game_interface.hfo.step() == SERVER_DOWN:
        #     raise ServerDownError("training; episode={}".format(ep))
        # Go to origin position:
        features.update_features(game_interface.get_state())
        go_to_origin_position(game_interface=game_interface,
                              features=features,
                              actions=actions)
        # Start learning loop
        debug_counter = 0  # TODO remove
        while game_interface.in_game():
            # Update environment features:
            curr_state_id = features.get_state_index()
            has_ball = features.has_ball()

            # Act:
            debug_counter += 1
            action_idx = agent.act(curr_state_id)
            hfo_action_params, num_rep =\
                actions.map_action_idx_to_hfo_action(
                    agent_pos=features.get_pos_tuple(), has_ball=has_ball,
                    action_idx=action_idx)

            # Step:
            status, observation = execute_action(
                action_params=hfo_action_params,
                repetitions=num_rep,
                has_ball=has_ball,
                game_interface=game_interface)

            # Update environment features:
            reward = reward_funct(status)
            sum_score += reward
            features.update_features(observation)
            new_state_id = features.get_state_index()
            agent.store_ep(state_idx=curr_state_id,
                           action_idx=action_idx,
                           reward=reward,
                           next_state_idx=new_state_id,
                           has_ball=has_ball,
                           done=not game_interface.in_game())
        if status == OUT_OF_TIME:
            if debug_counter < 5:
                raise NoActionPlayedError(
                    "agent was only able to choose {}".format(debug_counter))
        agent.learn_buffer()
        agent.update_hyper_parameters(num_total_episodes=num_total_train_ep)
        sum_epsilons += agent.epsilon
        # Game Reset
        game_interface.reset()
    print("<<TRAIN>> AVR reward = ", sum_score / num_train_episodes)
    print("<<TRAIN>> %Explorations={}% ".format(
        round(
            (agent.counter_explorations /
             (agent.counter_exploitations + agent.counter_explorations)), 4) *
        100))

예제 #10

0

파일 보기

파일: train_player.py 프로젝트: pedMatias/matias_hfo

    num_episodes = (num_train_ep + num_test_ep) * num_repetitions
    # Directory
    save_dir = args.save_dir or mkdir(
        num_episodes, num_op, extra_note="oldEps")

    print("Starting Training - id={}; num_opponents={}; num_teammates={}; "
          "num_episodes={};".format(agent_id, num_op, num_team, num_episodes))
    # Initialize connection with the HFO server
    hfo_interface = HFOAttackingPlayer(agent_id=agent_id,
                                       num_opponents=num_op,
                                       num_teammates=num_team)
    hfo_interface.connect_to_server()

    # Agent set-up
    reward_function = basic_reward
    features_manager = DiscreteFeaturesV2(num_team, num_op)
    actions_manager = DiscreteActionsV5()
    agent = QLearningAgentV6(num_states=features_manager.get_num_states(),
                             num_actions=actions_manager.get_num_actions(),
                             learning_rate=0.1,
                             discount_factor=0.9,
                             epsilon=0.8)

    # Test one first time without previous train:
    av_reward = test(num_episodes=num_test_ep,
                     agent=agent,
                     game_interface=hfo_interface,
                     features=features_manager,
                     actions=actions_manager,
                     reward_funct=reward_function)
    # Save metrics structures