コード例 #1
0
 def __init__(self, team_name: str, num_opponents: int, num_teammates: int,
              models_dir: str, model_type: str, memory_bounded:bool = False,
              history_len: int = 1, port: int = 6000):
     self.team_name = team_name
     # Game Interface:
     self.game_interface = GameInterface(
         team_name=team_name,
         num_opponents=num_opponents,
         num_teammates=num_teammates,
         port=port)
     # Features Interface:
     self.features = PlasticFeatures(num_op=num_opponents,
                                     num_team=num_teammates)
     # Actions Interface:
     self.actions = PlasticActions(num_team=num_teammates,
                                   features=self.features,
                                   game_interface=self.game_interface)
     # Agent instance:
     self.policies = self.load_plastic_policies(models_dir,
                                                config.TEAMS_NAMES)
     self.behaviour_dist = BehaviourDist(
         policies=self.policies,
         memory_bounded=memory_bounded,
         history_len=history_len,
         num_features=self.features.get_num_features(),
         model_type=model_type
     )
     # Connect to rccserver
     self.game_interface.connect_to_server()
コード例 #2
0
 def __init__(self,
              team_name: str,
              num_opponents: int,
              num_teammates: int,
              model_file: str,
              epsilon: int = 1,
              port: int = 6000):
     # Game Interface:
     self.game_interface = GameInterface(team_name=team_name,
                                         num_opponents=num_opponents,
                                         num_teammates=num_teammates,
                                         port=port)
     self.game_interface.connect_to_server()
     # Features Interface:
     self.features = PlasticFeatures(num_op=num_opponents,
                                     num_team=num_teammates)
     # Actions Interface:
     self.actions = PlasticActions(num_team=num_teammates,
                                   features=self.features,
                                   game_interface=self.game_interface)
     # Agent instance:
     self.epsilon = epsilon
     self.dqn = DQN.load(load_file=model_file)
コード例 #3
0
 def __init__(self, num_opponents: int, num_teammates: int, directory: str,
              step: int):
     # Game Interface:
     self.game_interface = GameInterface(num_opponents=num_opponents,
                                         num_teammates=num_teammates)
     # Features Interface:
     self.features = PlasticFeatures(num_op=num_opponents,
                                     num_team=num_teammates)
     # Actions Interface:
     self.actions = PlasticActions(num_team=num_teammates,
                                   features=self.features,
                                   game_interface=self.game_interface)
     # DQN:
     self.dqn = DQN.create(num_teammates=num_teammates,
                           num_features=self.features.get_num_features(),
                           num_actions=self.actions.get_num_actions(),
                           learning_rate=LEARNING_RATE)
     # Attributes:
     self.directory = directory
     self.step = step
     # Metrics:
     self.replay_buffer = list()
     self.saved_iterations = []
     self.losses = []
コード例 #4
0
class Trainer:
    def __init__(self, num_opponents: int, num_teammates: int, directory: str,
                 step: int):
        # Game Interface:
        self.game_interface = GameInterface(num_opponents=num_opponents,
                                            num_teammates=num_teammates)
        # Features Interface:
        self.features = PlasticFeatures(num_op=num_opponents,
                                        num_team=num_teammates)
        # Actions Interface:
        self.actions = PlasticActions(num_team=num_teammates,
                                      features=self.features,
                                      game_interface=self.game_interface)
        # DQN:
        self.dqn = DQN.create(num_teammates=num_teammates,
                              num_features=self.features.get_num_features(),
                              num_actions=self.actions.get_num_actions(),
                              learning_rate=LEARNING_RATE)
        # Attributes:
        self.directory = directory
        self.step = step
        # Metrics:
        self.replay_buffer = list()
        self.saved_iterations = []
        self.losses = []

    def _restart_replay_buffer(self):
        self.replay_buffer = list()

    def _save_model(self,
                    model_base: str,
                    iter: int,
                    model: DQN = None,
                    save_as_main_model: bool = False):
        if not model:
            model = self.dqn

        if save_as_main_model:
            main_model_file = model_base
            main_model_file = os.path.join(self.directory, main_model_file)
            model.save_model(file_name=main_model_file)
        # Save iteration:
        model_file = f"{model_base}.{len(self.saved_iterations)}"
        model_file = os.path.join(self.directory, model_file)
        # Model:
        model.save_model(file_name=model_file)
        self.saved_iterations.append(iter)

    def _fit_batch(self,
                   minibatch: List[Transition],
                   verbose: int = 0,
                   epochs: int = 1) -> list:
        # Get current states from minibatch, then query NN model for Q values
        current_states = np.array([transition.obs for transition in minibatch])
        current_qs_list = self.dqn.model.predict(current_states)

        # Get future states from minibatch, then query NN model for Q values
        new_states = np.array([transition.new_obs for transition in minibatch])
        future_qs_list = self.dqn.model.predict(new_states)

        # Now we need to enumerate our batches
        X = []
        y = []
        for idx, transition in enumerate(minibatch):
            # If not a terminal state, get new q from future states, else 0
            # almost like with Q Learning, but we use just part of equation
            if not transition.done:
                max_future_q = max(future_qs_list[idx])
                td = transition.reward + (DISCOUNT_FACTOR * max_future_q)
            else:
                td = transition.reward

            # Update Q value for given state
            current_qs = current_qs_list[idx]
            current_qs[transition.act] = td
            # current_qs[action] = current_qs[action] + self.learning_rate * td

            X.append(transition.obs)
            y.append(current_qs)

        # Fit on all samples as one batch, log only on terminal state
        loss = self.dqn.fit(np.array(X),
                            np.array(y),
                            epochs=epochs,
                            verbose=verbose,
                            batch_size=MINIBATCH_SIZE)
        return loss

    def _load_learn_buffer(self, data_file: str):
        if os.path.isfile(data_file):
            with open(data_file, "rb") as fp:
                data: list = pickle.load(fp)
            self.replay_buffer += data
        else:
            ValueError(f"Can not find file {data_file}")

    def load_experience_from_dir(self,
                                 clean_learn_buffer: bool,
                                 verbose=False,
                                 starting_step: int = 0):
        if clean_learn_buffer:
            self._restart_replay_buffer()
        for prev_step in range(starting_step, self.step + 1):
            data_file = config.DQN_EXPERIENCE_BUFFER_FORMAT.format(
                step=prev_step)
            data_file = os.path.join(self.directory, data_file)
            self._load_learn_buffer(data_file)
        if verbose:
            print(f"\n[TRAIN : Step {self.step}] "
                  f"DATA LEN={len(self.replay_buffer)};\n")

    def train_model(self, verbose: bool = False):
        def divide_batchs(l, n):
            # looping till length l
            batchs = list()
            for i in range(0, len(l), n):
                batchs.append(l[i:i + n])
            return batchs

        print(f"[train_model: {self.step}] Started")
        start_time = time.time()

        random.shuffle(self.replay_buffer)
        batchs = divide_batchs(self.replay_buffer, BATCH_SIZE)
        num_rep = len(batchs)
        model_base = config.MODEL_FILE_FORMAT.format(step=self.step)
        for i, train_data in enumerate(batchs):
            print(f"::: {i}/{num_rep}")
            ## Early save model:
            #if i == (num_rep // 2):
            #    self._save_model(model_base=model_base, iter=i)
            # Train:
            # train_data = random.sample(self.replay_buffer, BATCH_SIZE)
            loss = self._fit_batch(train_data, verbose=0, epochs=EPOCHS)
            self.losses.append(sum(loss) / len(loss))

        # Trained Min number of iterations
        self._save_model(model_base=model_base,
                         iter=num_rep,
                         save_as_main_model=False)

        models = []
        new_losses = []
        for i in range(num_rep, num_rep + NUM_MIN_STABLE_TRAINING_EP):
            print(f"::: {i}/{num_rep + NUM_MIN_STABLE_TRAINING_EP}")
            # Train:
            train_data = random.sample(self.replay_buffer, BATCH_SIZE)
            loss = self._fit_batch(train_data, verbose=0, epochs=EPOCHS)
            avr_loss = sum(loss) / len(loss)
            # Save model:
            models.append(deepcopy(self.dqn))
            new_losses.append(avr_loss)
            self.losses.append(avr_loss)

        # Save the new model with lower loss:
        i = new_losses.index(min(new_losses))
        self._save_model(model_base=model_base,
                         iter=i + num_rep,
                         model=models[i],
                         save_as_main_model=True)

        duration = (time.time() - start_time) // 60  # Minutes
        print(f"[train_model: {self.step}] Ended. Took {duration} minutes")

        return TrainMetrics(losses=self.losses,
                            saved_iterations=self.saved_iterations,
                            num_rep=num_rep + NUM_MIN_STABLE_TRAINING_EP,
                            epochs=EPOCHS,
                            batch_size=BATCH_SIZE,
                            min_batch_size=MINIBATCH_SIZE,
                            learning_rate=LEARNING_RATE,
                            discount_factor=DISCOUNT_FACTOR,
                            DQN_details=config.DQN_LAYERS)
コード例 #5
0
class PlasticPlayer:
    def __init__(self, team_name: str, num_opponents: int, num_teammates: int,
                 models_dir: str, model_type: str, memory_bounded:bool = False,
                 history_len: int = 1, port: int = 6000):
        self.team_name = team_name
        # Game Interface:
        self.game_interface = GameInterface(
            team_name=team_name,
            num_opponents=num_opponents,
            num_teammates=num_teammates,
            port=port)
        # Features Interface:
        self.features = PlasticFeatures(num_op=num_opponents,
                                        num_team=num_teammates)
        # Actions Interface:
        self.actions = PlasticActions(num_team=num_teammates,
                                      features=self.features,
                                      game_interface=self.game_interface)
        # Agent instance:
        self.policies = self.load_plastic_policies(models_dir,
                                                   config.TEAMS_NAMES)
        self.behaviour_dist = BehaviourDist(
            policies=self.policies,
            memory_bounded=memory_bounded,
            history_len=history_len,
            num_features=self.features.get_num_features(),
            model_type=model_type
        )
        # Connect to rccserver
        self.game_interface.connect_to_server()
    
    @staticmethod
    def load_plastic_policies(dir_path: str, team_names: list):
        if not os.path.isdir(dir_path):
            print(f"[load_plastic_models] Dir not found {dir_path};")
            raise NotADirectoryError(dir_path)
        policies = list()
        for team_name in team_names:
            if not os.path.isdir(os.path.join(dir_path, team_name)):
                print(f":: Can not find team {team_name}!\n".upper())
            else:
                policy = Policy.load(team_name=team_name, base_dir=dir_path)
                policies.append(policy)
                print(f":: Found Policy {team_name};")
        return policies
    
    def _get_reward(self, game_status: int) -> int:
        reward = 0
        if game_status == GOAL:
            kicker_unum = self.game_interface.get_last_player_to_touch_ball()
            # Player scored the goal:
            if kicker_unum == 11:
                reward += 100
            reward += 1000
        elif game_status in [CAPTURED_BY_DEFENSE, OUT_OF_BOUNDS, OUT_OF_TIME]:
            reward -= 1000
        else:
            reward -= 1
        return reward
    
    def _play_episode(self, verbose: bool = False):
        # auxiliar structures:
        guessed_teams = list()
        b_dist_buffer = list()
        # metrics:
        touched_ball = False
        passed_ball = False
        while self.game_interface.in_game():
            if self.features.has_ball():
                touched_ball = True
            # Update environment features:
            features_array = self.features.get_features()
            # Act:
            legal_actions = self.actions.get_legal_actions()
            act = self.behaviour_dist.select_action(features_array,
                                                    legal_actions)
            self.actions.execute_action(act, verbose=verbose)

            # Store transition:
            # (obs, action, reward, new obs, done?)
            transition = Transition(
                obs=features_array,
                act=act,
                reward=self._get_reward(self.game_interface.get_game_status()),
                new_obs=self.features.get_features(),
                done=not self.game_interface.in_game()
            )
            
            # Update Beliefs:
            self.behaviour_dist.update_beliefs(transition)
            
            # Save metrics:
            predicted_policy = self.behaviour_dist.get_best_policy()
            guessed_teams.append(predicted_policy.team_name)
            b_dist_buffer.append(self.behaviour_dist.get_probabilities_dict())
            
            # Metrics:
            if "PASS" in self.actions.get_action_name(action_idx=act):
                passed_ball = True
        metrics = EpisodeMetrics(
            touched_ball=touched_ball,
            passed_ball=passed_ball)
        return guessed_teams, b_dist_buffer, metrics
    
    def play(self, num_episodes: int, verbose: bool = False):
        """
        @param num_episodes: number of episodes to train in this iteration
        @raise ServerDownError
        @return: Selected Teams, Game Metrics
        """
        game_metrics = GameMetrics()
        game_metrics.set_correct_team(self.team_name)
        # Predicted Teams Distributions
        selected_teams = list()
        game_results = list()
        
        for ep in range(num_episodes):
            # Check if server still running:
            try:
                self.game_interface.check_server_is_up()
            except ServerDownError:
                print("!!SERVER DOWN!! Test {}/{}".format(ep, num_episodes))
                return selected_teams, game_results, \
                       game_metrics.export_to_dict()
            
            # Update features:
            self.features.re_calculate_features(
                observation=self.game_interface.get_observation(),
                last_player_touch_ball_uniform_num=0)
            # Play episode:
            guessed_teams, b_dist_buffer, ep_metrics = \
                self._play_episode(verbose=verbose)
            goal: bool = self.game_interface.scored_goal()
            
            # Update auxiliar variables:
            game_metrics.add_episode_metrics(
                ep_metrics,
                goal=goal,
                guessed_teams=guessed_teams
            )
            
            game_results.append(1 if goal else 0)
            # Selected Teams:
            aux_dict = dict()
            for ep_dist in b_dist_buffer:
                for team, val in ep_dist.items():
                    try:
                        aux_dict[team] += val
                    except KeyError:
                        aux_dict[team] = val
            # Normalize values:
            num_ep = len(b_dist_buffer)
            for team, val in aux_dict.items():
                aux_dict[team] = val / num_ep
            selected_teams.append(aux_dict)
            
            # Game Reset
            self.game_interface.reset()
        
        metrics_dict = game_metrics.export_to_dict()
        metrics_dict["teams"] = [policy.team_name for policy in self.policies]
        metrics_dict["correct_team"] = self.team_name
        if verbose:
            print(f"[Game Metrics] {metrics_dict}")
        return selected_teams, game_results, metrics_dict
コード例 #6
0
class Player:
    def __init__(self,
                 team_name: str,
                 num_opponents: int,
                 num_teammates: int,
                 model_file: str,
                 epsilon: int = 1,
                 port: int = 6000):
        # Game Interface:
        self.game_interface = GameInterface(team_name=team_name,
                                            num_opponents=num_opponents,
                                            num_teammates=num_teammates,
                                            port=port)
        self.game_interface.connect_to_server()
        # Features Interface:
        self.features = PlasticFeatures(num_op=num_opponents,
                                        num_team=num_teammates)
        # Actions Interface:
        self.actions = PlasticActions(num_team=num_teammates,
                                      features=self.features,
                                      game_interface=self.game_interface)
        # Agent instance:
        self.epsilon = epsilon
        self.dqn = DQN.load(load_file=model_file)

    def exploit_actions(self, state: np.ndarray, verbose: bool = False) -> int:
        q_predict = self.dqn.predict(state)[0]

        # Set illegal actions to zero:
        legal_actions = self.actions.get_legal_actions()
        for i in range(len(q_predict)):
            if i not in legal_actions:
                q_predict[i] = -2000
        # Greedy choice:
        max_list = np.where(q_predict == q_predict.max())
        if len(max_list[0]) > 1:
            action = np.random.choice(max_list[0])
        else:
            action = np.argmax(q_predict)
        if verbose:
            print("Q values {} -> {}".format(q_predict, int(action)))
        return int(action)

    def explore_actions(self):
        legal_actions: range = self.actions.get_legal_actions()
        random_action = np.random.choice(legal_actions)
        return random_action

    def act(self,
            state: np.ndarray,
            metrics: GameMetrics,
            verbose: bool = False):
        if np.random.random() < self.epsilon:  # Explore
            if verbose: print("[ACT] Explored")
            metrics.inc_num_exploration_steps()
            return self.explore_actions()
        else:  # Exploit
            if verbose: print("[ACT] Exploit")
            metrics.inc_num_exploitation_steps()
            return self.exploit_actions(state)

    def get_reward(self, game_status: int) -> int:
        if game_status == GOAL:
            reward = 1000
        elif game_status in [CAPTURED_BY_DEFENSE, OUT_OF_BOUNDS, OUT_OF_TIME]:
            reward = -1000
        else:
            reward = -1
        return reward

    def play_episode(self, game_metrics: GameMetrics, verbose: bool = False):
        # auxiliar structures:
        episode_buffer = list()
        # metrics:
        touched_ball = False
        passed_ball = False
        scored_goal = False
        # auxiliar:
        last_act = None
        while self.game_interface.in_game():
            if self.features.has_ball():
                touched_ball = True
            # Update environment features:
            features_array = self.features.get_features()
            # Act:
            act = self.act(features_array, metrics=game_metrics, verbose=False)
            if verbose:
                if act == last_act:
                    log_action = False
                else:
                    print(f"{self.features.team_ball_possession}; "
                          f"{self.features.has_ball()}")
                    log_action = True
                self.actions.execute_action(act, verbose=log_action)
                last_act = act
            else:
                self.actions.execute_action(act, verbose=False)

            # Store transition:
            # (obs, action, reward, new obs, done?)
            transition = Transition(obs=features_array,
                                    act=act,
                                    reward=self.get_reward(
                                        self.game_interface.get_game_status()),
                                    new_obs=self.features.get_features(),
                                    done=not self.game_interface.in_game())
            episode_buffer.append(transition)

            # Metrics:
            if "PASS" in self.actions.get_action_name(action_idx=act):
                passed_ball = True

        if self.game_interface.scored_goal():
            uniform = self.game_interface.hfo.getUnum()
            if self.game_interface.last_player_to_touch_ball == uniform:
                scored_goal = True

        metrics = EpisodeMetrics(touched_ball=touched_ball,
                                 passed_ball=passed_ball,
                                 scored_goal=scored_goal)
        return episode_buffer, metrics

    def play(self, num_episodes: int, verbose: bool = False):
        """
        @param num_episodes: number of episodes to train in this iteration
        @raise ServerDownError
        @return: Game Metrics
        """
        experience_buffer = LearnBuffer()
        game_metrics = GameMetrics()

        for ep in range(num_episodes):
            # Check if server still running:
            try:
                self.game_interface.check_server_is_up()
            except ServerDownError:
                print("!!SERVER DOWN!! Test {}/{}".format(ep, num_episodes))
                return experience_buffer, game_metrics.export_to_dict()

            # Update features:
            self.features.re_calculate_features(
                observation=self.game_interface.get_observation(),
                last_player_touch_ball_uniform_num=0)
            # Play episode:
            ep_buffer, ep_metrics = self.play_episode(game_metrics, verbose)
            # Save episode:
            experience_buffer.save_episode(ep_buffer)

            # Update auxiliar variables:
            game_metrics.add_episode_metrics(
                ep_metrics, goal=self.game_interface.scored_goal())

            # Game Reset
            self.game_interface.reset()

        return experience_buffer, game_metrics.export_to_dict()