def test(train_ep: int, num_episodes: int, game_interface: HFOAttackingPlayer, features: DiscreteHighLevelFeatures, agent: QLearningAgent, actions: ActionManager): # Run training using Q-Learning score = 0 agent.test_episodes.append(train_ep) for ep in range(num_episodes): print('<Test> {}/{}:'.format(ep, num_episodes)) while game_interface.in_game(): # Update environment features: observation = game_interface.get_state() curr_state_id = features.get_state_index(observation) has_ball = features.has_ball(observation) # Act: action_idx = agent.exploit_actions(curr_state_id) hfo_action = actions.map_action(action_idx) # Step: status, observation = game_interface.step(hfo_action, has_ball) # Save Metrics: agent.save_visited_state(curr_state_id, action_idx) agent.cum_reward += reward_function(status) print(':: Episode: {}; reward: {}'.format(ep, agent.cum_reward)) score += 1 if game_interface.status == GOAL else 0 # Reset player: agent.reset(training=False) # Game Reset game_interface.reset() agent.scores.append(score) actions_name = [ actions_manager.map_action_to_str(i) for i in range(agent.num_actions) ] agent.export_metrics(training=False, actions_name=actions_name)
def train(num_episodes: int, game_interface: HFOAttackingPlayer, features: DiscreteHighLevelFeatures, agent: QLearningAgent, actions: ActionManager): for ep in range(num_episodes): print('<Training> Episode {}/{}:'.format(ep, num_episodes)) while game_interface.in_game(): # Update environment features: observation = game_interface.get_state() curr_state_id = features.get_state_index(observation) has_ball = features.has_ball(observation) # Act: action_idx = agent.act(curr_state_id) hfo_action = actions.map_action(action_idx) # Step: status, observation = game_interface.step(hfo_action, has_ball) reward = reward_function(status) # Save metrics: agent.save_visited_state(curr_state_id, action_idx) agent.cum_reward += reward # Update environment features: prev_state_id = curr_state_id curr_state_id = features.get_state_index(observation) # Update agent agent.learn(prev_state_id, action_idx, reward, status, curr_state_id) print(':: Episode: {}; reward: {}'.format(ep, agent.cum_reward)) agent.save_metrics(agent.old_q_table, agent.q_table) # Reset player: agent.reset() agent.update_hyper_parameters() # Game Reset game_interface.reset() agent.save_model() actions_name = [ actions_manager.map_action_to_str(i) for i in range(agent.num_actions) ] agent.export_metrics(training=False, actions_name=actions_name)
parser.add_argument('--numTeammates', type=int, default=0) parser.add_argument('--numOpponents', type=int, default=1) parser.add_argument('--numEpisodes', type=int, default=1) parser.add_argument('--saveFile', type=str, default="q_agent.model") args = parser.parse_args() num_teammates = args.numTeammates num_opponents = args.numOpponents n_games = args.numEpisodes save_file = args.saveFile lr = 0.01 epsilon = 1 discount_factor = 0.9 # Useful Instances: hfo = HFOEnvironment() hfo.connectToServer(feature_set=HIGH_LEVEL_FEATURE_SET, server_port=6000) env = DiscreteHighLevelFeatures(hfo.getState(), num_teammates, num_opponents) actions = Action() agent = QLearner(num_states=env.get_num_states(), num_actions=actions.get_num_actions(), epsilon=epsilon, learning_rate=lr, discount_factor=discount_factor, save_file=save_file) # Saving lists scores = [] eps_history = [] for i in range(n_games): print("\n<< {}/{} Game >> eps={}".format(i, n_games, agent.epsilon)) game_status = IN_GAME score = 0 while game_status == IN_GAME:
train_mode = args.train_mode num_train_ep = args.num_train_ep num_test_ep = args.num_test_ep num_episodes = num_train_ep + num_test_ep print("Starting Training - id={}; num_opponents={}; num_teammates={}; " "num_episodes={};".format(agent_id, num_op, num_team, num_episodes)) # Initialize connection with the HFO server hfo_interface = HFOAttackingPlayer(agent_id=agent_id, num_opponents=num_op, num_teammates=num_team) hfo_interface.connect_to_server() # Agent set-up reward_function = simple_reward features_manager = DiscreteHighLevelFeatures(num_team, num_op) actions_manager = ActionManager([SHOOT, MOVE, DRIBBLE]) agent = QLearningAgent(num_states=features_manager.get_num_states(), num_actions=actions_manager.get_num_actions(), learning_rate=0.1, discount_factor=0.99, epsilon=1.0, epsilon_dec=0.9992) # Run training using Q-Learning if train_mode == "train_only": print('\n=== Train Mode for {}:'.format(num_train_ep)) train(num_episodes=num_train_ep, game_interface=hfo_interface, features=features_manager, agent=agent,
saving_file = args.save_file print("Starting Training - id={}; num_opponents={}; num_teammates={}; " "num_episodes={}; saveFile={};".format(agent_id, num_op, num_team, num_episodes, saving_file)) # Initialize connection with the HFO server hfo_interface = HFOAttackingPlayer(agent_id=agent_id, num_opponents=num_op, num_teammates=num_team) hfo_interface.connect_to_server() # Reward Function reward_function = simple_reward # Get number of features and actions features_manager = DiscreteHighLevelFeatures(num_team, num_op) actions_manager = ActionManager([SHOOT, MOVE, DRIBBLE]) # Initialize a Q-Learning Agent agent = QLearningAgent(num_states=features_manager.get_num_states(), num_actions=actions_manager.get_num_actions(), learning_rate=0.1, discount_factor=0.99, epsilon=1.0, num_games=num_episodes, save_file=saving_file) # Run training using Q-Learning for i in range(num_episodes): print('\n=== Episode {}/{}:'.format(i, num_episodes)) agent.reset(i)
args = parser.parse_args() port = args.port hfo = HFOEnvironment() hfo.connectToServer(feature_set=HIGH_LEVEL_FEATURE_SET, server_port=port, config_dir=CONFIG_DIR) print("Connected") for i in range(1): status = IN_GAME score = 0 NUM_TEAMMATES = 0 NUM_OPPONENTS = 2 observation = hfo.getState() env = DiscreteHighLevelFeatures(num_team=NUM_TEAMMATES, num_op=NUM_OPPONENTS) ep_counter = 0 while status == IN_GAME: hfo.act(DRIBBLE) # check game status: ep_counter += 1 status = hfo.step() env.get_features(hfo.getState()) print("OP: ", env.agent.proximity_op) if status == SERVER_DOWN: hfo.act(QUIT) break """ if bool(hfo_env.has_ball(observation)) is False: hfo.act(MOVE)