def __init__(self, team_name: str, num_opponents: int, num_teammates: int, models_dir: str, model_type: str, memory_bounded:bool = False, history_len: int = 1, port: int = 6000): self.team_name = team_name # Game Interface: self.game_interface = GameInterface( team_name=team_name, num_opponents=num_opponents, num_teammates=num_teammates, port=port) # Features Interface: self.features = PlasticFeatures(num_op=num_opponents, num_team=num_teammates) # Actions Interface: self.actions = PlasticActions(num_team=num_teammates, features=self.features, game_interface=self.game_interface) # Agent instance: self.policies = self.load_plastic_policies(models_dir, config.TEAMS_NAMES) self.behaviour_dist = BehaviourDist( policies=self.policies, memory_bounded=memory_bounded, history_len=history_len, num_features=self.features.get_num_features(), model_type=model_type ) # Connect to rccserver self.game_interface.connect_to_server()
def __init__(self, team_name: str, num_opponents: int, num_teammates: int, model_file: str, epsilon: int = 1, port: int = 6000): # Game Interface: self.game_interface = GameInterface(team_name=team_name, num_opponents=num_opponents, num_teammates=num_teammates, port=port) self.game_interface.connect_to_server() # Features Interface: self.features = PlasticFeatures(num_op=num_opponents, num_team=num_teammates) # Actions Interface: self.actions = PlasticActions(num_team=num_teammates, features=self.features, game_interface=self.game_interface) # Agent instance: self.epsilon = epsilon self.dqn = DQN.load(load_file=model_file)
def __init__(self, num_opponents: int, num_teammates: int, directory: str, step: int): # Game Interface: self.game_interface = GameInterface(num_opponents=num_opponents, num_teammates=num_teammates) # Features Interface: self.features = PlasticFeatures(num_op=num_opponents, num_team=num_teammates) # Actions Interface: self.actions = PlasticActions(num_team=num_teammates, features=self.features, game_interface=self.game_interface) # DQN: self.dqn = DQN.create(num_teammates=num_teammates, num_features=self.features.get_num_features(), num_actions=self.actions.get_num_actions(), learning_rate=LEARNING_RATE) # Attributes: self.directory = directory self.step = step # Metrics: self.replay_buffer = list() self.saved_iterations = [] self.losses = []
class Trainer: def __init__(self, num_opponents: int, num_teammates: int, directory: str, step: int): # Game Interface: self.game_interface = GameInterface(num_opponents=num_opponents, num_teammates=num_teammates) # Features Interface: self.features = PlasticFeatures(num_op=num_opponents, num_team=num_teammates) # Actions Interface: self.actions = PlasticActions(num_team=num_teammates, features=self.features, game_interface=self.game_interface) # DQN: self.dqn = DQN.create(num_teammates=num_teammates, num_features=self.features.get_num_features(), num_actions=self.actions.get_num_actions(), learning_rate=LEARNING_RATE) # Attributes: self.directory = directory self.step = step # Metrics: self.replay_buffer = list() self.saved_iterations = [] self.losses = [] def _restart_replay_buffer(self): self.replay_buffer = list() def _save_model(self, model_base: str, iter: int, model: DQN = None, save_as_main_model: bool = False): if not model: model = self.dqn if save_as_main_model: main_model_file = model_base main_model_file = os.path.join(self.directory, main_model_file) model.save_model(file_name=main_model_file) # Save iteration: model_file = f"{model_base}.{len(self.saved_iterations)}" model_file = os.path.join(self.directory, model_file) # Model: model.save_model(file_name=model_file) self.saved_iterations.append(iter) def _fit_batch(self, minibatch: List[Transition], verbose: int = 0, epochs: int = 1) -> list: # Get current states from minibatch, then query NN model for Q values current_states = np.array([transition.obs for transition in minibatch]) current_qs_list = self.dqn.model.predict(current_states) # Get future states from minibatch, then query NN model for Q values new_states = np.array([transition.new_obs for transition in minibatch]) future_qs_list = self.dqn.model.predict(new_states) # Now we need to enumerate our batches X = [] y = [] for idx, transition in enumerate(minibatch): # If not a terminal state, get new q from future states, else 0 # almost like with Q Learning, but we use just part of equation if not transition.done: max_future_q = max(future_qs_list[idx]) td = transition.reward + (DISCOUNT_FACTOR * max_future_q) else: td = transition.reward # Update Q value for given state current_qs = current_qs_list[idx] current_qs[transition.act] = td # current_qs[action] = current_qs[action] + self.learning_rate * td X.append(transition.obs) y.append(current_qs) # Fit on all samples as one batch, log only on terminal state loss = self.dqn.fit(np.array(X), np.array(y), epochs=epochs, verbose=verbose, batch_size=MINIBATCH_SIZE) return loss def _load_learn_buffer(self, data_file: str): if os.path.isfile(data_file): with open(data_file, "rb") as fp: data: list = pickle.load(fp) self.replay_buffer += data else: ValueError(f"Can not find file {data_file}") def load_experience_from_dir(self, clean_learn_buffer: bool, verbose=False, starting_step: int = 0): if clean_learn_buffer: self._restart_replay_buffer() for prev_step in range(starting_step, self.step + 1): data_file = config.DQN_EXPERIENCE_BUFFER_FORMAT.format( step=prev_step) data_file = os.path.join(self.directory, data_file) self._load_learn_buffer(data_file) if verbose: print(f"\n[TRAIN : Step {self.step}] " f"DATA LEN={len(self.replay_buffer)};\n") def train_model(self, verbose: bool = False): def divide_batchs(l, n): # looping till length l batchs = list() for i in range(0, len(l), n): batchs.append(l[i:i + n]) return batchs print(f"[train_model: {self.step}] Started") start_time = time.time() random.shuffle(self.replay_buffer) batchs = divide_batchs(self.replay_buffer, BATCH_SIZE) num_rep = len(batchs) model_base = config.MODEL_FILE_FORMAT.format(step=self.step) for i, train_data in enumerate(batchs): print(f"::: {i}/{num_rep}") ## Early save model: #if i == (num_rep // 2): # self._save_model(model_base=model_base, iter=i) # Train: # train_data = random.sample(self.replay_buffer, BATCH_SIZE) loss = self._fit_batch(train_data, verbose=0, epochs=EPOCHS) self.losses.append(sum(loss) / len(loss)) # Trained Min number of iterations self._save_model(model_base=model_base, iter=num_rep, save_as_main_model=False) models = [] new_losses = [] for i in range(num_rep, num_rep + NUM_MIN_STABLE_TRAINING_EP): print(f"::: {i}/{num_rep + NUM_MIN_STABLE_TRAINING_EP}") # Train: train_data = random.sample(self.replay_buffer, BATCH_SIZE) loss = self._fit_batch(train_data, verbose=0, epochs=EPOCHS) avr_loss = sum(loss) / len(loss) # Save model: models.append(deepcopy(self.dqn)) new_losses.append(avr_loss) self.losses.append(avr_loss) # Save the new model with lower loss: i = new_losses.index(min(new_losses)) self._save_model(model_base=model_base, iter=i + num_rep, model=models[i], save_as_main_model=True) duration = (time.time() - start_time) // 60 # Minutes print(f"[train_model: {self.step}] Ended. Took {duration} minutes") return TrainMetrics(losses=self.losses, saved_iterations=self.saved_iterations, num_rep=num_rep + NUM_MIN_STABLE_TRAINING_EP, epochs=EPOCHS, batch_size=BATCH_SIZE, min_batch_size=MINIBATCH_SIZE, learning_rate=LEARNING_RATE, discount_factor=DISCOUNT_FACTOR, DQN_details=config.DQN_LAYERS)
class PlasticPlayer: def __init__(self, team_name: str, num_opponents: int, num_teammates: int, models_dir: str, model_type: str, memory_bounded:bool = False, history_len: int = 1, port: int = 6000): self.team_name = team_name # Game Interface: self.game_interface = GameInterface( team_name=team_name, num_opponents=num_opponents, num_teammates=num_teammates, port=port) # Features Interface: self.features = PlasticFeatures(num_op=num_opponents, num_team=num_teammates) # Actions Interface: self.actions = PlasticActions(num_team=num_teammates, features=self.features, game_interface=self.game_interface) # Agent instance: self.policies = self.load_plastic_policies(models_dir, config.TEAMS_NAMES) self.behaviour_dist = BehaviourDist( policies=self.policies, memory_bounded=memory_bounded, history_len=history_len, num_features=self.features.get_num_features(), model_type=model_type ) # Connect to rccserver self.game_interface.connect_to_server() @staticmethod def load_plastic_policies(dir_path: str, team_names: list): if not os.path.isdir(dir_path): print(f"[load_plastic_models] Dir not found {dir_path};") raise NotADirectoryError(dir_path) policies = list() for team_name in team_names: if not os.path.isdir(os.path.join(dir_path, team_name)): print(f":: Can not find team {team_name}!\n".upper()) else: policy = Policy.load(team_name=team_name, base_dir=dir_path) policies.append(policy) print(f":: Found Policy {team_name};") return policies def _get_reward(self, game_status: int) -> int: reward = 0 if game_status == GOAL: kicker_unum = self.game_interface.get_last_player_to_touch_ball() # Player scored the goal: if kicker_unum == 11: reward += 100 reward += 1000 elif game_status in [CAPTURED_BY_DEFENSE, OUT_OF_BOUNDS, OUT_OF_TIME]: reward -= 1000 else: reward -= 1 return reward def _play_episode(self, verbose: bool = False): # auxiliar structures: guessed_teams = list() b_dist_buffer = list() # metrics: touched_ball = False passed_ball = False while self.game_interface.in_game(): if self.features.has_ball(): touched_ball = True # Update environment features: features_array = self.features.get_features() # Act: legal_actions = self.actions.get_legal_actions() act = self.behaviour_dist.select_action(features_array, legal_actions) self.actions.execute_action(act, verbose=verbose) # Store transition: # (obs, action, reward, new obs, done?) transition = Transition( obs=features_array, act=act, reward=self._get_reward(self.game_interface.get_game_status()), new_obs=self.features.get_features(), done=not self.game_interface.in_game() ) # Update Beliefs: self.behaviour_dist.update_beliefs(transition) # Save metrics: predicted_policy = self.behaviour_dist.get_best_policy() guessed_teams.append(predicted_policy.team_name) b_dist_buffer.append(self.behaviour_dist.get_probabilities_dict()) # Metrics: if "PASS" in self.actions.get_action_name(action_idx=act): passed_ball = True metrics = EpisodeMetrics( touched_ball=touched_ball, passed_ball=passed_ball) return guessed_teams, b_dist_buffer, metrics def play(self, num_episodes: int, verbose: bool = False): """ @param num_episodes: number of episodes to train in this iteration @raise ServerDownError @return: Selected Teams, Game Metrics """ game_metrics = GameMetrics() game_metrics.set_correct_team(self.team_name) # Predicted Teams Distributions selected_teams = list() game_results = list() for ep in range(num_episodes): # Check if server still running: try: self.game_interface.check_server_is_up() except ServerDownError: print("!!SERVER DOWN!! Test {}/{}".format(ep, num_episodes)) return selected_teams, game_results, \ game_metrics.export_to_dict() # Update features: self.features.re_calculate_features( observation=self.game_interface.get_observation(), last_player_touch_ball_uniform_num=0) # Play episode: guessed_teams, b_dist_buffer, ep_metrics = \ self._play_episode(verbose=verbose) goal: bool = self.game_interface.scored_goal() # Update auxiliar variables: game_metrics.add_episode_metrics( ep_metrics, goal=goal, guessed_teams=guessed_teams ) game_results.append(1 if goal else 0) # Selected Teams: aux_dict = dict() for ep_dist in b_dist_buffer: for team, val in ep_dist.items(): try: aux_dict[team] += val except KeyError: aux_dict[team] = val # Normalize values: num_ep = len(b_dist_buffer) for team, val in aux_dict.items(): aux_dict[team] = val / num_ep selected_teams.append(aux_dict) # Game Reset self.game_interface.reset() metrics_dict = game_metrics.export_to_dict() metrics_dict["teams"] = [policy.team_name for policy in self.policies] metrics_dict["correct_team"] = self.team_name if verbose: print(f"[Game Metrics] {metrics_dict}") return selected_teams, game_results, metrics_dict
class Player: def __init__(self, team_name: str, num_opponents: int, num_teammates: int, model_file: str, epsilon: int = 1, port: int = 6000): # Game Interface: self.game_interface = GameInterface(team_name=team_name, num_opponents=num_opponents, num_teammates=num_teammates, port=port) self.game_interface.connect_to_server() # Features Interface: self.features = PlasticFeatures(num_op=num_opponents, num_team=num_teammates) # Actions Interface: self.actions = PlasticActions(num_team=num_teammates, features=self.features, game_interface=self.game_interface) # Agent instance: self.epsilon = epsilon self.dqn = DQN.load(load_file=model_file) def exploit_actions(self, state: np.ndarray, verbose: bool = False) -> int: q_predict = self.dqn.predict(state)[0] # Set illegal actions to zero: legal_actions = self.actions.get_legal_actions() for i in range(len(q_predict)): if i not in legal_actions: q_predict[i] = -2000 # Greedy choice: max_list = np.where(q_predict == q_predict.max()) if len(max_list[0]) > 1: action = np.random.choice(max_list[0]) else: action = np.argmax(q_predict) if verbose: print("Q values {} -> {}".format(q_predict, int(action))) return int(action) def explore_actions(self): legal_actions: range = self.actions.get_legal_actions() random_action = np.random.choice(legal_actions) return random_action def act(self, state: np.ndarray, metrics: GameMetrics, verbose: bool = False): if np.random.random() < self.epsilon: # Explore if verbose: print("[ACT] Explored") metrics.inc_num_exploration_steps() return self.explore_actions() else: # Exploit if verbose: print("[ACT] Exploit") metrics.inc_num_exploitation_steps() return self.exploit_actions(state) def get_reward(self, game_status: int) -> int: if game_status == GOAL: reward = 1000 elif game_status in [CAPTURED_BY_DEFENSE, OUT_OF_BOUNDS, OUT_OF_TIME]: reward = -1000 else: reward = -1 return reward def play_episode(self, game_metrics: GameMetrics, verbose: bool = False): # auxiliar structures: episode_buffer = list() # metrics: touched_ball = False passed_ball = False scored_goal = False # auxiliar: last_act = None while self.game_interface.in_game(): if self.features.has_ball(): touched_ball = True # Update environment features: features_array = self.features.get_features() # Act: act = self.act(features_array, metrics=game_metrics, verbose=False) if verbose: if act == last_act: log_action = False else: print(f"{self.features.team_ball_possession}; " f"{self.features.has_ball()}") log_action = True self.actions.execute_action(act, verbose=log_action) last_act = act else: self.actions.execute_action(act, verbose=False) # Store transition: # (obs, action, reward, new obs, done?) transition = Transition(obs=features_array, act=act, reward=self.get_reward( self.game_interface.get_game_status()), new_obs=self.features.get_features(), done=not self.game_interface.in_game()) episode_buffer.append(transition) # Metrics: if "PASS" in self.actions.get_action_name(action_idx=act): passed_ball = True if self.game_interface.scored_goal(): uniform = self.game_interface.hfo.getUnum() if self.game_interface.last_player_to_touch_ball == uniform: scored_goal = True metrics = EpisodeMetrics(touched_ball=touched_ball, passed_ball=passed_ball, scored_goal=scored_goal) return episode_buffer, metrics def play(self, num_episodes: int, verbose: bool = False): """ @param num_episodes: number of episodes to train in this iteration @raise ServerDownError @return: Game Metrics """ experience_buffer = LearnBuffer() game_metrics = GameMetrics() for ep in range(num_episodes): # Check if server still running: try: self.game_interface.check_server_is_up() except ServerDownError: print("!!SERVER DOWN!! Test {}/{}".format(ep, num_episodes)) return experience_buffer, game_metrics.export_to_dict() # Update features: self.features.re_calculate_features( observation=self.game_interface.get_observation(), last_player_touch_ball_uniform_num=0) # Play episode: ep_buffer, ep_metrics = self.play_episode(game_metrics, verbose) # Save episode: experience_buffer.save_episode(ep_buffer) # Update auxiliar variables: game_metrics.add_episode_metrics( ep_metrics, goal=self.game_interface.scored_goal()) # Game Reset self.game_interface.reset() return experience_buffer, game_metrics.export_to_dict()