def test(num_episodes: int, game_interface: HFOAttackingPlayer, features: DiscreteFeatures1TeammateV1, agent: QLearningAgent, actions: DiscreteActions1TeammateV1, reward_funct) -> float: """ @param num_episodes: number of episodes to run @param game_interface: game interface, that manages interactions between both; @param features: features interface, from the observation array, gets the main features for the agent; @param agent: learning agent; @param actions: actions interface; @param reward_funct: reward function used @return: (float) the win rate """ # Run training using Q-Learning num_goals = 0 for ep in range(num_episodes): # Check if server still up: if game_interface.hfo.step() == SERVER_DOWN: print("Server is down while testing; episode={}".format(ep)) break # Go to origin position: features.update_features(game_interface.get_state()) go_to_origin_position(game_interface=game_interface, features=features, actions=actions) # Test loop: debug_counter = 0 # TODO remove while game_interface.in_game(): # Update environment features: curr_state_id = features.get_state_index() has_ball = features.has_ball() # Act: debug_counter += 1 action_idx = agent.act(curr_state_id) action_name = actions.map_action_to_str(action_idx, has_ball) print("Agent playing {}".format(action_name)) # Step: status = execute_action(action_name=action_name, features=features, game_interface=game_interface) # update features: reward = reward_funct(status) num_goals += 1 if reward == 1 else 0 if status == OUT_OF_TIME: if debug_counter < 5: raise NoActionPlayedError( "agent was only able to choose {}".format(debug_counter)) # Game Reset game_interface.reset() print("<<TEST>> NUM Goals = ", num_goals) print("<<TEST>> NUM episodes = ", (ep + 1)) print("<<TEST>> AVR win rate = ", num_goals / (ep + 1)) return num_goals / num_episodes
def train(num_train_episodes: int, num_total_train_ep: int, game_interface: HFOAttackingPlayer, features: DiscreteFeatures1TeammateV1, agent: QLearningAgent, actions: DiscreteActions1TeammateV1, reward_funct): """ @param num_train_episodes: number of episodes to train in this iteration @param num_total_train_ep: number total of episodes to train @param game_interface: game interface, that manages interactions between both; @param features: features interface, from the observation array, gets the main features for the agent; @param agent: learning agent; @param actions: actions interface; @param reward_funct: reward function used @return: (QLearningAgentV5) the agent """ sum_score = 0 sum_epsilons = 0 agent.counter_explorations = 0 agent.counter_exploitations = 0 for ep in range(num_train_episodes): # Check if server still up: if game_interface.hfo.step() == SERVER_DOWN: raise ServerDownError("training; episode={}".format(ep)) # Go to origin position: features.update_features(game_interface.get_state()) go_to_origin_position(game_interface=game_interface, features=features, actions=actions) # Start learning loop debug_counter = 0 # TODO remove while game_interface.in_game(): # Update environment features: curr_state_id = features.get_state_index() has_ball = features.has_ball() # Act: debug_counter += 1 action_idx = agent.act(curr_state_id) action_name = actions.map_action_to_str(action_idx, has_ball) # print("Agent playing {} for {}".format(action_name, num_rep)) # Step: status = execute_action(action_name=action_name, features=features, game_interface=game_interface) # Update environment features: reward = reward_funct(status) sum_score += reward new_state_id = features.get_state_index() agent.store_ep(state_idx=curr_state_id, action_idx=action_idx, reward=reward, next_state_idx=new_state_id, has_ball=has_ball, done=not game_interface.in_game()) if game_interface.get_game_status() == OUT_OF_TIME: if debug_counter < 5: raise NoActionPlayedError( "agent was only able to choose {}".format(debug_counter)) agent.learn_buffer() agent.update_hyper_parameters(num_total_episodes=num_total_train_ep) sum_epsilons += agent.epsilon # Game Reset game_interface.reset() print("<<TRAIN>> AVR reward = ", sum_score / num_train_episodes) print("<<TRAIN>> %Explorations={}% ".format( round( (agent.counter_explorations / (agent.counter_exploitations + agent.counter_explorations)), 4) * 100))
hfo_interface = HFOAttackingPlayer(num_opponents=num_op, num_teammates=num_team) hfo_interface.connect_to_server() print("Starting Training - id={}; num_opponents={}; num_teammates={}; " "num_episodes={};".format(hfo_interface.hfo.getUnum(), num_op, num_team, num_episodes)) # Agent set-up reward_function = basic_reward features_manager = DiscreteFeatures1TeammateV1(num_team, num_op) actions_manager = DiscreteActions1TeammateV1() # Q Agent: agent = QLearningAgent(num_states=features_manager.get_num_states(), num_actions=actions_manager.get_num_actions(), learning_rate=0.1, discount_factor=0.9, epsilon=0.6, final_epsilon=0.1) agent.load_q_table(model_file) # save original q_table save_model(q_table=agent.q_table, file_name="original_model", directory=save_dir) # Test one first time without previous train: av_reward = test(num_episodes=num_test_ep, agent=agent, game_interface=hfo_interface, features=features_manager, actions=actions_manager, reward_funct=reward_function)
# Initialize connection with the HFO server hfo_interface = HFOAttackingPlayer(num_opponents=num_op, num_teammates=num_team) hfo_interface.connect_to_server() print("Starting Training - id={}; num_opponents={}; num_teammates={}; " "num_episodes={};".format(hfo_interface.hfo.getUnum(), num_op, num_team, num_episodes)) # Agent set-up reward_function = basic_reward features_manager = DiscreteFeatures1TeammateV1(num_team, num_op) actions_manager = DiscreteActions1TeammateV1() agent = QLearningAgent(num_states=features_manager.get_num_states(), num_actions=actions_manager.get_num_actions(), learning_rate=0.1, discount_factor=0.9, epsilon=0.8) # Test one first time without previous train: av_reward = test(num_episodes=num_test_ep, agent=agent, game_interface=hfo_interface, features=features_manager, actions=actions_manager, reward_funct=reward_function) # Save metrics structures trained_eps_list = [0] avr_epsilons_list = [agent.epsilon] avr_rewards_list = [av_reward] qlearning_variation_list = [0]
args = parser.parse_args() num_team = args.num_teammates num_op = args.num_opponents num_episodes = args.num_ep load_file = args.load_file print("Q Table file: ", load_file) print("Starting Test - num_opponents={}; num_teammates={}; " "num_episodes={};".format(num_op, num_team, num_episodes)) # Initialize connection with the HFO server hfo_interface = HFOAttackingPlayer(num_opponents=num_op, num_teammates=num_team) hfo_interface.connect_to_server() # Agent set-up reward_function = basic_reward features_manager = DiscreteFeatures1TeammateV1(num_team, num_op) actions_manager = DiscreteActions1TeammateV1() agent = QLearningAgent(num_states=features_manager.get_num_states(), num_actions=actions_manager.get_num_actions()) agent.load_q_table(load_file) # Run training using Q-Learning av_reward = test(num_episodes=num_episodes, agent=agent, game_interface=hfo_interface, features=features_manager, actions=actions_manager, reward_funct=reward_function) print("Av reward = {}".format(av_reward))