예제 #1
0
    def play_with_ai_white(self, ai_agent=ControllerConfig.AI_AGENT):
        """
        Returns
        -------
        dict
            Dict of possible action and state
        """
        self.state = State()
        self.state.initial_state()
        self.player_vs_ai_white = True
        state_dict = AIElements.get_state_dict(self.state)
        possible_action = AIElements.get_possible_action(self.state)
        self.possible_action_keys = possible_action.keys()

        if ai_agent == 'random':
            self.ai_agent = RandomAgent()
        elif ai_agent == 'minimaxab':
            self.ai_agent = MinimaxABAgent(player_color=0)
        elif ai_agent == 'azero':
            self.ai_agent = AlphaZeroAgent()

        self.old_state_reward = deepcopy(self.state)

        return {
            "state": state_dict,
            "possible_action": possible_action,
            "task": "CHANGE_PLAYER"
        }
예제 #2
0
    def _minimax(self, current_depth, state, is_max_turn):
        """
        Minimax Helper
        :param current_depth: the current tree depth in the recursion
        :param state: the current state in the tree node
        :param is_max_turn: bool check the current's node maximizer or not?
        :return:
        """
        if current_depth == self.max_depth or state.is_terminal():
            return AIElements.evaluation_function(state, self.player_color), ""

        self.node_expanded += 1
        possible_action = AIElements.get_possible_action(state)
        key_of_actions = list(possible_action.keys())

        shuffle(key_of_actions)  #randomness
        best_value = float('-inf') if is_max_turn else float('inf')
        action_target = ""
        for action_key in key_of_actions:
            new_state = AIElements.result_function(state,
                                                   possible_action[action_key])

            eval_child, action_child = self._minimax(current_depth + 1,
                                                     new_state,
                                                     not is_max_turn)

            if is_max_turn and best_value < eval_child:
                best_value = eval_child
                action_target = action_key

            elif (not is_max_turn) and best_value > eval_child:
                best_value = eval_child
                action_target = action_key

        return best_value, action_target
    def choose_action(self, state):
        """
        Predict the move using AlphaZero algorithm

        Parameters
        ----------
        state : State
            unused. The purpose of this parameter is to match the other class.
        float, str:
            The evaluation or utility and the action key name
        """
        from ai_modules.ai_elements import AIElements
        import numpy as np
        self.mcts.self_play()
        action_proba = np.array(self.mcts.get_action_proba(temperature=0))
        action = np.random.choice(len(action_proba), p=action_proba)
        action_key = self.ae.inverse_transform([action])[0]
        possible_action = AIElements.get_possible_action(
            self.stacked_state.head)
        from ai_modules.ai_elements import AIElements
        new_state = AIElements.result_function(self.stacked_state.head,
                                               possible_action[action_key])
        self.stacked_state.append(new_state)
        self.mcts.update_root(action_key)
        return (action_key, possible_action[action_key])
예제 #4
0
def fight_agent(best_model: str,
                current_model: str,
                ae,
                round_fight=AlphaZeroConfig.ROUND_ARENA,
                max_turn=AlphaZeroConfig.MAX_TURN_ARENA,
                max_simulation=AlphaZeroConfig.MAX_SIMULATION_ARENA):
    """
    The pitted 2 agents. We will check who is the best here.
    :param best_model: The current best model file path
    :param current_model: The current model file path
    :param ae: The Action Encoder
    :param round_fight: number of round to determine the winner
    :param max_turn: The maximum turn of the game. If the current turn is higher than max turn.
        It will be cut and the outcome of the game is draw.
    :param max_simulation: The maximum of simulation
    :return: dict, The dictionary of the score
    """
    from ai_modules.reinforcement_algorithm import AlphaZeroAgent

    loss_win = {0: 0, 1: 0}
    for round in range(round_fight):
        print("ROUND {}".format(round + 1))
        terminal = False
        count_turn = 1
        state = State()
        state.initial_state()
        best_model_agent = AlphaZeroAgent(state, max_simulation,
                                          best_model)  # 1
        current_model_agent = None  # 0
        while not terminal and count_turn <= max_turn:
            print("=======TURN {} ========".format(count_turn))
            state.print_board()
            current_player_turn = state.get_player_turn()
            if current_player_turn == 1:
                key, dict_key = best_model_agent.choose_action(state)
                state = AIElements.result_function(state, dict_key)
                if current_model_agent is not None:
                    current_model_agent.enemy_turn_action(key, state)
            else:
                if current_model_agent is None:
                    current_model_agent = AlphaZeroAgent(
                        state, max_simulation, current_model)
                key, dict_key = current_model_agent.choose_action(state)
                state = AIElements.result_function(state, dict_key)
                best_model_agent.enemy_turn_action(key, state)
            print("Player %d choose action %s" % (current_player_turn, key))

            game_ended = state.is_terminal()
            if game_ended:
                print("Player {} Win".format(count_turn % 2))
                loss_win[(current_player_turn) % 2] += 1
                terminal = True
            count_turn += 1
            if count_turn > max_turn:
                print("ROUND {} DRAW".format(round + 1))
    return loss_win
예제 #5
0
    def choose_action(self, state):
        """
        Predict the move using minimax alpha beta pruning algorithm

        Parameters
        ----------
        state : State

        Returns
        -------
        float, str:
            The evaluation or utility and the action key name
        """
        self.node_expanded = 0

        start_time = time.time()

        print("MINIMAX AB : Wait AI is choosing")
        list_action = AIElements.get_possible_action(state)
        eval_score, selected_key_action = self._minimax(
            0, state, True, float('-inf'), float('inf'))
        print("MINIMAX : Done, eval = %d, expanded %d" %
              (eval_score, self.node_expanded))
        print("--- %s seconds ---" % (time.time() - start_time))

        return (selected_key_action, list_action[selected_key_action])
예제 #6
0
    def get_whattodo_view(self):
        """
        Give the view the dict that tell the possible action on this turn and the task
        that the view should do
        :return: dict
        """
        params_view_action = {}

        self.state.print_board()
        if AIElements.is_over(self.state):
            params_view_action['task'] = 'END_GAME'
            print("test")
            return params_view_action
        if self.two_players:
            params_view_action['task'] = 'CHANGE_PLAYER'
            params_view_action['state'] = AIElements.get_state_dict(self.state)
            possible_action = AIElements.get_possible_action(self.state)
            params_view_action['possible_action'] = possible_action
            self.possible_action_keys = possible_action.keys()
        if self.player_vs_ai_white:
            self.possible_action_keys = AIElements.get_possible_action(
                self.state).keys()
            params_view_action['task'] = 'AI_MOVE'
            ai_key_action, ai_action_params = self.ai_agent.choose_action(
                self.state)
            previous_state = deepcopy(self.state)
            self.receive_input_action_play(ai_key_action, ai_action_params)
            if AIElements.is_over(self.state):
                params_view_action['end'] = True
                params_view_action['task'] = 'END_GAME'
                return params_view_action
            print("Reward Function is %.2f" % (AIElements.reward_function(
                self.old_state_reward, self.state, 1)))  #Black
            self.old_state_reward = deepcopy(self.state)
            state_dict = AIElements.get_state_dict(self.state)
            previous_state_dict = AIElements.get_state_dict(previous_state)
            possible_action = AIElements.get_possible_action(self.state)
            previous_mana = AIElements.get_players_mana(previous_state)

            params_view_action['state'] = state_dict
            params_view_action["prev_state"] = previous_state_dict
            params_view_action["ai_action"] = ai_action_params
            params_view_action["prev_mana"] = previous_mana
            params_view_action["possible_action"] = possible_action
            self.possible_action_keys = possible_action.keys()
        return params_view_action
예제 #7
0
    def _minimax(self, current_depth, state, is_max_turn, alpha, beta):
        """
        Helper function of minimax
        :param current_depth: The current depth on the tree in recursive
        :param state: State of the current node in recursive
        :param is_max_turn: Check if the current node is the max turn in recursive
        :param alpha: parameter of AB Prunning, save the current maximizer best value
        :param beta: parameter of AB Prunning, save the current minimizer best value
        :return: int , str The value of the best action and the name of the action
        """
        if current_depth == self.max_depth or state.is_terminal():
            return AIElements.evaluation_function(state, self.player_color), ""

        self.node_expanded += 1

        possible_action = AIElements.get_possible_action(state)
        key_of_actions = list(possible_action.keys())

        shuffle(key_of_actions)  # add randomness here
        best_value = float('-inf') if is_max_turn else float('inf')
        action_target = ""
        for action_key in key_of_actions:
            new_state = AIElements.result_function(state,
                                                   possible_action[action_key])

            eval_child, action_child = self._minimax(current_depth + 1,
                                                     new_state,
                                                     not is_max_turn, alpha,
                                                     beta)

            if is_max_turn and best_value < eval_child:
                best_value = eval_child
                action_target = action_key
                alpha = max(alpha, best_value)
                if beta <= alpha:
                    break

            elif (not is_max_turn) and best_value > eval_child:
                best_value = eval_child
                action_target = action_key
                beta = min(beta, best_value)
                if beta <= alpha:
                    break

        return best_value, action_target
예제 #8
0
    def play_with_two_players_start(self):
        """
        Return the initial state

        Returns
        -------
        dict
            Dict of possible Action and state
        """
        self.state = State()
        self.state.initial_state()
        self.two_players = True
        state_dict = AIElements.get_state_dict(self.state)
        possible_action = AIElements.get_possible_action(self.state)
        self.possible_action_keys = possible_action.keys()
        return {
            "state": state_dict,
            "possible_action": possible_action,
            "task": "CHANGE_PLAYER"
        }
예제 #9
0
    def receive_input_action_play(self, input_key, input_params):
        """
        Process the input from the user in the view
        :param input_key: str, the key of the action
        :param input_params: dict, the parameter of the action
        :return: bool, tell that the action is present in the possible action
        """
        if input_key in self.possible_action_keys:
            self.state = AIElements.result_function(self.state, input_params)

            ## Useful for alpha zero only
            self.ai_agent.enemy_turn_action(input_key, input_params)

            index_player = (AIElements.get_player(self.state) + 1) % 2
            print("TURN %d" % (self.state.turn))
            print("The Evaluation of Player %d is %.2f" %
                  (index_player,
                   AIElements.evaluation_function(self.state, index_player)))
            return True
        else:
            return False
예제 #10
0
    def choose_action(self, state):
        """
        Predict the move with uniform proba random.

        Parameters
        ----------
        state : State

        Returns
        -------
        float, str:
            The evaluation or utility and the action key name
        """
        import random
        list_action = AIElements.get_possible_action(state)
        key_list_action = list_action.keys()
        rand_int = random.randint(0, len(key_list_action) - 1)
        selected_key_action = list(key_list_action)[rand_int]
        return (selected_key_action, list_action[selected_key_action])
예제 #11
0
    def get_deep_representation_stack(self):
        """
        The representation of state to be input of the deep learning
        For more details, see my medium post (Part 3)
        :return: the state representation
        """
        input_network = np.zeros((9, 9, self.planes_total))
        counter_iter = self.max_len - len(self.deque_collection)
        for state in reversed(self.deque_collection):
            planes_index = counter_iter * self.max_features
            counter_iter += 1
            all_pawn_list = state.white_pawn_list + state.black_pawn_list + [state.white_king, state.black_king]
            possible_action = AIElements.get_possible_action(state)
            all_rune_list = state.rune_list
            input_network[0, 4, planes_index + 22] = state.turn % 5
            input_network[4, 4, planes_index + 23] = state.turn % 5
            input_network[8, 4, planes_index + 24] = state.turn % 5

            for rune in all_rune_list:
                input_network[rune.x,rune.y, planes_index + 25] = 1
            for i in possible_action:
                if i != 'skip':
                    possible_action_dict = possible_action[i]
                    # player_index = possible_action_dict['player_index']
                    x = possible_action_dict['pawn_x']
                    y = possible_action_dict['pawn_y']
                    if 'player' in possible_action_dict:
                        index_color = possible_action_dict['player']
                        input_network[x, y, planes_index + 26 + index_color] += 1
                    if 'player_index' in possible_action_dict:
                        index_color = possible_action_dict['player_index']
                        input_network[x, y, planes_index + 26 + index_color] += 1
            for pawn in all_pawn_list:
                x, y = pawn.x, pawn.y
                player_mana = pawn.player.mana
                if not pawn.dead:
                    index_color = 0 if pawn.player.color == 0 else 1

                    pawn_hp = pawn.hp
                    input_network[x, y, planes_index + index_color + 0] = pawn_hp

                    pawn_atk = pawn.atk
                    input_network[x, y, planes_index + index_color + 2] = pawn_atk

                    pawn_step = pawn.step
                    input_network[x, y, planes_index + index_color + 4] = pawn_step

                    input_network[x, y, planes_index + index_color + 6] = 1 # location pawn
                    if isinstance(pawn, KnightPawn):
                        input_network[x, y, planes_index + index_color + 8] = 1
                    elif isinstance(pawn, RookPawn):
                        input_network[x, y, planes_index + index_color + 10] = 1
                    elif isinstance(pawn, BishopPawn):
                        input_network[x, y, planes_index + index_color + 12] = 1
                    elif isinstance(pawn, SoldierPawn):
                        input_network[x, y, planes_index + index_color + 14] = 1
                    elif isinstance(pawn, QueenPawn):
                        input_network[x, y, planes_index + index_color + 16] = 1
                    elif isinstance(pawn, King):
                        input_network[x, y, planes_index + index_color + 18] = 1
                    input_network[x, y, planes_index + index_color + 20] = player_mana
        return input_network
예제 #12
0
def do_self_play_episode(
        stacked_state,
        mcts,
        ae: ActionEncoder,
        greed=False,
        pov=0,
        temperature_turn_end=AlphaZeroConfig.TEMPERATURE_END_STEP,
        greedy_turn=AlphaZeroConfig.GREEDY_TURN_MIN,
        max_turn_episode=AlphaZeroConfig.MAX_TURN_SIMULATION):
    """
    Self play an episode. This function will simulate MCTS every turns.
    :param stacked_state: The state that is stacked.
    :param mcts: a MCTS that will be simulated every turns.
    :param ae: Action Encoder
    :param greed: Hack that will be used to make the agent prioritize attacking and promoting.
    :param pov: The chosen point of view of player
    :param temperature_turn_end: Make the temperature to 0 on the given turn
    :return: Training data that will be appended to the global list of training data
    """
    terminal = False
    counter_step = 0
    list_training = []
    while not terminal:
        counter_step += 1
        print("Step : %d" % (counter_step))
        print("Turn : %d" % (stacked_state.head.turn))
        print("Player in state : %d" % (stacked_state.head.get_player_turn()))
        print("Player in tree : %d" %
              (mcts.root.stacked_state.head.get_player_turn()))

        temperature = 1
        if counter_step > temperature_turn_end:
            temperature = 0

        # Hack if counter_step > greedy_turn, greedily attack enemies
        if counter_step > greedy_turn and greed:
            print("Greedy Mode Activate")
            mcts.self_play(greed)
        else:
            mcts.self_play()
        action_proba = np.array(mcts.get_action_proba(temperature))

        action = np.random.choice(len(action_proba), p=action_proba)
        action_key = ae.inverse_transform([action])[0]
        possible_action = AIElements.get_possible_action(stacked_state.head)
        # Append list training
        if stacked_state.head.get_player_turn() != pov:
            training_data_tobe = HelperTrainingExample(
                mirror_stacked_state(deepcopy(stacked_state)),
                (stacked_state.head.get_player_turn()),
                ae.array_mirrored(action_proba))
        else:
            training_data_tobe = HelperTrainingExample(
                deepcopy(stacked_state), stacked_state.head.get_player_turn(),
                action_proba)
        print("Action_proba shape {}".format(action_proba.shape))

        list_training.append(training_data_tobe)

        # TODO : add v_treshold to cut
        stacked_state.head.print_board()
        print('Mirrored')
        mirror_stacked_state(deepcopy(stacked_state)).head.print_board()

        print("Player %d choose action %s" %
              (stacked_state.head.get_player_turn(), action_key))
        print("Next mean action value : %.4f" %
              (mcts.root.q_state_action[action_key]))

        new_state = AIElements.result_function(stacked_state.head,
                                               possible_action[action_key])
        stacked_state.append(new_state)
        mcts.update_root(action_key)

        terminal = stacked_state.head.is_terminal()
        if terminal:
            # Update the reward in list_training
            mcts.self_play()  # to fill the v variable
            reward = mcts.root.v  # reward should be -1
            loser_player = stacked_state.head.get_player_turn()
            for i in list_training:
                if i.current_player == loser_player:
                    i.reward = reward
                else:
                    i.reward = -reward
        if counter_step > max_turn_episode:
            # Terminate episode, set reward to 0
            reward = 0
            for i in list_training:
                i.reward = reward
            terminal = True
        print("-----")
    return list_training
예제 #13
0
    def expand_node(self, model_deep_net, player_color, label_encoder,
                    epsilon=AlphaZeroConfig.MCTS_EPSILON,
                    alpha_diri=AlphaZeroConfig.MCTS_ALPHA_DIRICHLET,
                    cpuct=AlphaZeroConfig.MCTS_PUCT,
                    greed_attack = False):
        """
        This function contains 2 steps on the MCTS.
        Select and Expand & Evaluate
        :param model_deep_net: The neural network model
        :param player_color: why did I include this???
        :param label_encoder: The encoder used to encode the action key
        :param epsilon: hyperparameter for using the dirichlet random proba
        :param alpha_diri: hyperparameter of dirichlet
        :param cpuct: hyperparameter of the MCTS in alpha zero
        :param greed_attack: HACK! the agent will prioritize attacking and promoting
        :return:
        """
        terminal = AIElements.is_over(self.stacked_state.head)
        self.is_terminal = terminal
        if not terminal:
            possible_action = AIElements.get_possible_action(self.stacked_state.head)
            possible_action_keys = list(possible_action.keys())

            if self.p_state is None:
                """
                    Expand and Evaluate goes here!
                """
                if self.stacked_state.head.get_player_turn() == self.maximizer:
                    state_stack_representation = np.array([self.stacked_state.get_deep_representation_stack()])
                else:
                    state_stack_representation = mirror_stacked_state(self.stacked_state)
                    state_stack_representation = np.array([state_stack_representation.get_deep_representation_stack()])

                self.p_state, self.v = model_deep_net.predict(state_stack_representation)
                self.v_ = self.v[0][0]
                self.v = self.v[0][0]
                self.p_state = self.p_state[0]

                if self.stacked_state.head.get_player_turn() != self.maximizer:
                    self.p_state = label_encoder.array_mirrored(self.p_state)
                possible_action_ohe = label_encoder.transform(possible_action_keys).sum(axis=0)
                self.p_state *= possible_action_ohe
                sum_policy_state = np.sum(self.p_state)
                if sum_policy_state > 0:
                    ## normalize to sum 1
                    self.p_state /= sum_policy_state
                else:
                    print("All valid moves were masked, do workaround.")
                    self.p_state += possible_action_ohe
                    self.p_state /= np.sum(self.p_state)

                # Initialize num and q
                for action in possible_action_keys:
                    self.num_state_action[action] = 0
                    self.q_state_action[action] = 0
                    next_state = AIElements.result_function(self.stacked_state.head, possible_action[action])
                    new_stacked_state = deepcopy(self.stacked_state)
                    new_stacked_state.append(next_state)
                    if action not in self.edge_action:
                        self.edge_action[action] = NodeMCTS(new_stacked_state, parent=self, root=False)

            else:
                """
                    Select goes here
                """
                best_action = ""
                best_upper_confidence = -float('inf')

                dirchlet_prob = np.random.dirichlet([alpha_diri] * len(possible_action_keys))
                counter_loop = 0

                # Randomize possible_action_keys
                random.shuffle(possible_action_keys)
                for action in possible_action_keys:
                    # Get the index of the action
                    index_action = label_encoder.le.transform([action])[0]
                    q_state_action_val = 0
                    num_state_action_val = 0
                    if action in self.q_state_action and action in self.num_state_action:
                        q_state_action_val = self.q_state_action[action]
                        num_state_action_val = self.num_state_action[action]
                    if self.root:
                        upper_confidence = q_state_action_val + \
                                           cpuct * ((1 - epsilon) * self.p_state[index_action] + epsilon * dirchlet_prob[
                            counter_loop]) * \
                                           np.sqrt(self.num_state) / (1 + num_state_action_val)


                    else:
                        upper_confidence = q_state_action_val + \
                                           cpuct * self.p_state[index_action] * \
                                           np.sqrt(self.num_state) / (1 + num_state_action_val)
                    if greed_attack and possible_action[action]['action'] == 'attack':
                        upper_confidence += AlphaZeroConfig.Q_ATTACK_GREEDY # Higher Chance to Attack
                    if greed_attack and possible_action[action]['action'] == 'promote':
                        upper_confidence += AlphaZeroConfig.Q_PROMOTE_GREEDY # Higher Chance to promote
                    counter_loop += 1
                    if best_upper_confidence < upper_confidence:
                        best_upper_confidence = upper_confidence
                        best_action = action

                        # Expand the node and check if this node is terminal

                        next_state = AIElements.result_function(self.stacked_state.head, possible_action[action])
                        new_stacked_state = deepcopy(self.stacked_state)
                        new_stacked_state.append(next_state)
                        if action not in self.edge_action:
                            self.edge_action[action] = NodeMCTS(new_stacked_state, parent=self, root=False)

                self.selected_action = best_action

        else:
            self.v = self.stacked_state.head.sparse_eval(self.stacked_state.head.get_player_turn())