def fight_agent(best_model: str,
                current_model: str,
                ae,
                round_fight=AlphaZeroConfig.ROUND_ARENA,
                max_turn=AlphaZeroConfig.MAX_TURN_ARENA,
                max_simulation=AlphaZeroConfig.MAX_SIMULATION_ARENA):
    """
    The pitted 2 agents. We will check who is the best here.
    :param best_model: The current best model file path
    :param current_model: The current model file path
    :param ae: The Action Encoder
    :param round_fight: number of round to determine the winner
    :param max_turn: The maximum turn of the game. If the current turn is higher than max turn.
        It will be cut and the outcome of the game is draw.
    :param max_simulation: The maximum of simulation
    :return: dict, The dictionary of the score
    """
    from ai_modules.reinforcement_algorithm import AlphaZeroAgent

    loss_win = {0: 0, 1: 0}
    for round in range(round_fight):
        print("ROUND {}".format(round + 1))
        terminal = False
        count_turn = 1
        state = State()
        state.initial_state()
        best_model_agent = AlphaZeroAgent(state, max_simulation,
                                          best_model)  # 1
        current_model_agent = None  # 0
        while not terminal and count_turn <= max_turn:
            print("=======TURN {} ========".format(count_turn))
            state.print_board()
            current_player_turn = state.get_player_turn()
            if current_player_turn == 1:
                key, dict_key = best_model_agent.choose_action(state)
                state = AIElements.result_function(state, dict_key)
                if current_model_agent is not None:
                    current_model_agent.enemy_turn_action(key, state)
            else:
                if current_model_agent is None:
                    current_model_agent = AlphaZeroAgent(
                        state, max_simulation, current_model)
                key, dict_key = current_model_agent.choose_action(state)
                state = AIElements.result_function(state, dict_key)
                best_model_agent.enemy_turn_action(key, state)
            print("Player %d choose action %s" % (current_player_turn, key))

            game_ended = state.is_terminal()
            if game_ended:
                print("Player {} Win".format(count_turn % 2))
                loss_win[(current_player_turn) % 2] += 1
                terminal = True
            count_turn += 1
            if count_turn > max_turn:
                print("ROUND {} DRAW".format(round + 1))
    return loss_win
Esempio n. 2
0
class GameController():
    """
    A class used to connect the view and the model.
    """
    def __init__(self):
        self.state = State()
        self.state.initial_state()
        self.possible_action_keys = []
        self.two_players = False
        self.player_vs_ai_white = False

    def play_with_two_players_start(self):
        """
        Return the initial state

        Returns
        -------
        dict
            Dict of possible Action and state
        """
        self.state = State()
        self.state.initial_state()
        self.two_players = True
        state_dict = AIElements.get_state_dict(self.state)
        possible_action = AIElements.get_possible_action(self.state)
        self.possible_action_keys = possible_action.keys()
        return {
            "state": state_dict,
            "possible_action": possible_action,
            "task": "CHANGE_PLAYER"
        }

    def play_with_ai_white(self, ai_agent=ControllerConfig.AI_AGENT):
        """
        Returns
        -------
        dict
            Dict of possible action and state
        """
        self.state = State()
        self.state.initial_state()
        self.player_vs_ai_white = True
        state_dict = AIElements.get_state_dict(self.state)
        possible_action = AIElements.get_possible_action(self.state)
        self.possible_action_keys = possible_action.keys()

        if ai_agent == 'random':
            self.ai_agent = RandomAgent()
        elif ai_agent == 'minimaxab':
            self.ai_agent = MinimaxABAgent(player_color=0)
        elif ai_agent == 'azero':
            self.ai_agent = AlphaZeroAgent()

        self.old_state_reward = deepcopy(self.state)

        return {
            "state": state_dict,
            "possible_action": possible_action,
            "task": "CHANGE_PLAYER"
        }

    def receive_input_action_play(self, input_key, input_params):
        """
        Process the input from the user in the view
        :param input_key: str, the key of the action
        :param input_params: dict, the parameter of the action
        :return: bool, tell that the action is present in the possible action
        """
        if input_key in self.possible_action_keys:
            self.state = AIElements.result_function(self.state, input_params)

            ## Useful for alpha zero only
            self.ai_agent.enemy_turn_action(input_key, input_params)

            index_player = (AIElements.get_player(self.state) + 1) % 2
            print("TURN %d" % (self.state.turn))
            print("The Evaluation of Player %d is %.2f" %
                  (index_player,
                   AIElements.evaluation_function(self.state, index_player)))
            return True
        else:
            return False

    def get_whattodo_view(self):
        """
        Give the view the dict that tell the possible action on this turn and the task
        that the view should do
        :return: dict
        """
        params_view_action = {}

        self.state.print_board()
        if AIElements.is_over(self.state):
            params_view_action['task'] = 'END_GAME'
            print("test")
            return params_view_action
        if self.two_players:
            params_view_action['task'] = 'CHANGE_PLAYER'
            params_view_action['state'] = AIElements.get_state_dict(self.state)
            possible_action = AIElements.get_possible_action(self.state)
            params_view_action['possible_action'] = possible_action
            self.possible_action_keys = possible_action.keys()
        if self.player_vs_ai_white:
            self.possible_action_keys = AIElements.get_possible_action(
                self.state).keys()
            params_view_action['task'] = 'AI_MOVE'
            ai_key_action, ai_action_params = self.ai_agent.choose_action(
                self.state)
            previous_state = deepcopy(self.state)
            self.receive_input_action_play(ai_key_action, ai_action_params)
            if AIElements.is_over(self.state):
                params_view_action['end'] = True
                params_view_action['task'] = 'END_GAME'
                return params_view_action
            print("Reward Function is %.2f" % (AIElements.reward_function(
                self.old_state_reward, self.state, 1)))  #Black
            self.old_state_reward = deepcopy(self.state)
            state_dict = AIElements.get_state_dict(self.state)
            previous_state_dict = AIElements.get_state_dict(previous_state)
            possible_action = AIElements.get_possible_action(self.state)
            previous_mana = AIElements.get_players_mana(previous_state)

            params_view_action['state'] = state_dict
            params_view_action["prev_state"] = previous_state_dict
            params_view_action["ai_action"] = ai_action_params
            params_view_action["prev_mana"] = previous_mana
            params_view_action["possible_action"] = possible_action
            self.possible_action_keys = possible_action.keys()
        return params_view_action
def fit_train(global_list_training,
              ae,
              model_deep_net,
              episode=AlphaZeroConfig.EPISODE,
              mtcs_sims=AlphaZeroConfig.MCTS_SIMULATION,
              best_model=None,
              greedy_episode=AlphaZeroConfig.GREEDY_EPISODE):
    """
    Train the model of neural network using the data generated from the MCTS simulation.
    :param global_list_training: the list of the data that will be used to the input of neural network
    :param ae: Action Encoder object
    :param model_deep_net: a Keras neural network model.
    :param episode: Number of game episodes
    :param mtcs_sims: Number of MCTS simulation
    :param best_model: a Keras neural network model which represents the best model
    :return:
    """
    if best_model is None:
        # If the training is from the start
        best_model = clone_model(model_deep_net.model)
        best_model.set_weights(model_deep_net.model.get_weights())
        best_model.save(AlphaZeroConfig.BEST_MODEL_PATH)

    for eps in range(episode):
        print("Episode %d" % (eps))
        print("==============")

        # If episode is below of 1/8 of episode, force attack or promote if possible.
        greed_is_good = True if eps < greedy_episode else False
        state = State()
        state.initial_state()
        stacked_state = StackedState(state)
        mcts = MCTreeSearch(model_deep_net.model, 1, mtcs_sims, ae,
                            stacked_state)

        new_list_training = do_self_play_episode(stacked_state, mcts, ae,
                                                 greed_is_good)
        global_list_training.extend(new_list_training)

        # Checkpoint list_training
        pickle.dump(global_list_training, open("global_list_training.p", "wb"))
        deep_repr_state, action_proba, reward = parse_global_list_training(
            global_list_training)
        print("Fitting the model!")
        print("------------")
        model_deep_net.model.fit([deep_repr_state], [action_proba, reward],
                                 batch_size=AlphaZeroConfig.BATCH_SIZE_FIT,
                                 epochs=AlphaZeroConfig.EPOCHS_FIT)
        model_deep_net.model.save(AlphaZeroConfig.CURRENT_MODEL_PATH)
        print("------------")
        print("Arena!")

        dict_score = fight_agent(AlphaZeroConfig.BEST_MODEL_PATH,
                                 AlphaZeroConfig.CURRENT_MODEL_PATH, ae)
        from pprint import pprint
        print("Arena is done! 1: Best Model, 0: Current Model")
        pprint(dict_score)
        if dict_score[
                0] >= dict_score[1] * AlphaZeroConfig.WIN_DIFFERENCE_ARENA:
            # Change the best model
            print("Change Best Model!")
            best_model = clone_model(model_deep_net.model)
            best_model.set_weights(model_deep_net.model.get_weights())
        else:
            # Redo the trained model into previous best model
            print("Redo the current model into best model")
            model_deep_net.model = clone_model(best_model)

            # We need to do this to compile the model.
            model_deep_net.model.set_weights(
                model_deep_net.model.get_weights())
            from keras.optimizers import Adam
            model_deep_net.model.compile(
                loss=['categorical_crossentropy', 'mean_squared_error'],
                optimizer=Adam(0.001))
        best_model.save("best_model.hdf5")
        model_deep_net.model.save("checkpoint.hdf5")