def __init__(self, num_opponents: int, num_teammates: int, model_file: str, epsilon: int = 1, port: int = 6000): # Game Interface: self.game_interface = HFOAttackingPlayer(num_opponents=num_opponents, num_teammates=num_teammates, port=port) self.game_interface.connect_to_server() # Features Interface: self.features = PlasticFeatures(num_op=num_opponents, num_team=num_teammates) # Actions Interface: self.actions = Actions(num_team=num_teammates, features=self.features, game_interface=self.game_interface) # Agent instance: self.epsilon = epsilon self.dqn = DQN.load(load_file=model_file)
def __init__(self, num_opponents: int, num_teammates: int, model_file: str): # Game Interface: self.game_interface = HFOAttackingPlayer(num_opponents=num_opponents, num_teammates=num_teammates) # Features Interface: self.features = PlasticFeatures(num_op=num_opponents, num_team=num_teammates) # Actions Interface: self.actions = Actions(num_team=num_teammates, features=self.features, game_interface=self.game_interface) # DQNs: self.dqn = DQN.load(load_file=model_file) # Replay buffer: self.replay_buffer = deque(maxlen=REPLAY_MEMORY_SIZE)
class Player: def __init__(self, num_opponents: int, num_teammates: int, model_file: str, epsilon: int = 1, port: int = 6000): # Game Interface: self.game_interface = HFOAttackingPlayer(num_opponents=num_opponents, num_teammates=num_teammates, port=port) self.game_interface.connect_to_server() # Features Interface: self.features = PlasticFeatures(num_op=num_opponents, num_team=num_teammates) # Actions Interface: self.actions = Actions(num_team=num_teammates, features=self.features, game_interface=self.game_interface) # Agent instance: self.epsilon = epsilon self.dqn = DQN.load(load_file=model_file) # Metrics: self.metrics = GameMetrics() def set_starting_game_conditions(self, start_pos: tuple = None, starts_fixed_position: bool = True, verbose: bool = False): """ Set starting game conditions. Move for initial position, for example """ if starts_fixed_position: if not start_pos: ball_pos: list = list(self.features.get_ball_coord()) starting_corners = get_vertices_around_ball(ball_pos) start_pos = random.choice(starting_corners) self.actions.move_to_pos(start_pos) if verbose: print(f"[PLAYER: GAME SET UP] Initial pos= {start_pos}") else: # Start in current position if verbose: print(f"[START GAME] Initial pos= RANDOM") # Informs the other players that it is ready to start: self.game_interface.hfo.say(settings.PLAYER_READY_MSG) def exploit_actions(self, state: np.ndarray, verbose: bool = False) -> int: q_predict = self.dqn.predict(state) max_list = np.where(q_predict == q_predict.max()) if len(max_list[0]) > 1: action = np.random.choice(max_list[0]) else: action = np.argmax(q_predict) if verbose: print("Q values {} -> {}".format(q_predict, int(action))) return int(action) def play(self, num_episodes: int, starts_fixed_position: bool = True): """ @param num_episodes: number of episodes to train in this iteration @param starts_fixed_position: bool @raise ServerDownError @return: Game Metrics """ self.metrics.restart() for ep in range(num_episodes): # Check if server still running: try: self.game_interface.check_server_is_up() except ServerDownError: print("!!SERVER DOWN!! Test {}/{}".format(ep, num_episodes)) metrics_dict = self.metrics.export_to_dict(num_episodes) return metrics_dict # Update features: self.features.update_features( observation=self.game_interface.get_observation()) # Go to origin position: self.set_starting_game_conditions( starts_fixed_position=starts_fixed_position) # metrics: touched_ball = False passed_ball = False while self.game_interface.in_game(): if self.features.has_ball(): touched_ball = True # Update environment features: features_array = self.features.get_features() # Act: act = self.exploit_actions(features_array, verbose=False) status, correct_action, passed_ball_succ = \ self.actions.execute_action(act) # Metrics: if passed_ball_succ is True: passed_ball = True if correct_action: self.metrics.inc_num_correct_actions() else: self.metrics.inc_num_wrong_actions() # Update auxiliar variables: self.metrics.inc_num_ep() if self.game_interface.scored_goal(): self.metrics.inc_num_wins() if touched_ball: self.metrics.inc_num_games_touched_ball() if passed_ball: self.metrics.inc_num_games_passed_ball() # Game Reset self.game_interface.reset() metrics_dict = self.metrics.export_to_dict(num_episodes) return metrics_dict
class Player: def __init__(self, num_opponents: int, num_teammates: int, model_file: str, epsilon: int = 1, port: int = 6000): # Game Interface: self.game_interface = HFOAttackingPlayer(num_opponents=num_opponents, num_teammates=num_teammates, port=port) self.game_interface.connect_to_server() # Features Interface: self.features = PlasticFeatures(num_op=num_opponents, num_team=num_teammates) # Actions Interface: self.actions = Actions(num_team=num_teammates, features=self.features, game_interface=self.game_interface) # Agent instance: self.epsilon = epsilon self.dqn = DQN.load(load_file=model_file) def exploit_actions(self, state: np.ndarray, verbose: bool = False) -> int: q_predict = self.dqn.predict(state) max_list = np.where(q_predict == q_predict.max()) action = np.random.choice(max_list[0]) if len(max_list[0]) > 1 \ else np.argmax(q_predict) if verbose: print("Q values {} -> {}".format(q_predict, int(action))) return int(action) def explore_actions(self): random_action = np.random.randint(0, self.actions.num_actions) return random_action def act(self, state: np.ndarray, verbose: bool = False): if np.random.random() < self.epsilon: # Explore if verbose: print("[ACT] Explored") return self.explore_actions() else: # Exploit if verbose: print("[ACT] Exploit") return self.exploit_actions(state) def get_reward(self, game_status: int, correct_action: bool) -> int: reward = 0 if game_status == GOAL: reward += 1000 elif game_status in [CAPTURED_BY_DEFENSE, OUT_OF_BOUNDS, OUT_OF_TIME]: reward -= 1000 else: if correct_action: reward += 1 else: reward -= 5 return reward def play_episode(self): # auxiliar structures: episode_buffer = list() # metrics: touched_ball = False passed_ball = False num_wrong_actions = 0 num_correct_actions = 0 while self.game_interface.in_game(): touched_ball = True if self.features.has_ball() else False # Update environment features: features_array = self.features.get_features() # Act: act = self.act(features_array, verbose=False) status, correct_action, passed_ball_succ = \ self.actions.execute_action(act) # Store transition: # (obs, action, reward, new obs, done?) transition = Transition(obs=features_array, act=act, reward=self.get_reward( status, correct_action), new_obs=self.features.get_features(), done=not self.game_interface.in_game(), correct_action=correct_action) episode_buffer.append(transition) # Metrics: passed_ball = True if passed_ball_succ else False if correct_action: num_correct_actions += 1 else: num_wrong_actions += 1 metrics = EpisodeMetrics(touched_ball=touched_ball, passed_ball=passed_ball, num_wrong_actions=num_wrong_actions, num_correct_actions=num_correct_actions) return episode_buffer, metrics def play(self, num_episodes: int, starts_fixed_position: bool = True): """ @param num_episodes: number of episodes to train in this iteration @param starts_fixed_position: bool @raise ServerDownError @return: Game Metrics """ experience_buffer = LearnBuffer() game_metrics = GameMetrics() for ep in range(num_episodes): # Check if server still running: try: self.game_interface.check_server_is_up() except ServerDownError: print("!!SERVER DOWN!! Test {}/{}".format(ep, num_episodes)) return experience_buffer, game_metrics.export_to_dict() # Update features: self.features.update_features( observation=self.game_interface.get_observation()) # Play episode: ep_buffer, ep_metrics = self.play_episode() # Save episode: experience_buffer.save_episode(ep_buffer, verbose=True) # Update auxiliar variables: game_metrics.add_episode_metrics( ep_metrics, goal=self.game_interface.scored_goal()) # Game Reset self.game_interface.reset() return experience_buffer, game_metrics.export_to_dict()