Ejemplo n.º 1
0
def random_game_generator() -> tuple:
    """Generate random valid board obtained by some moves from initial board.

    the maximim number of moves in the board generated is 20

    Returns
    -------
    tuple
        a random game
            first element is a random valid board.
            second element is turn's number.
            third element is draw counter.

    """
    current_board = initial_board_generator()
    num_of_turns = random.randint(0, 10)

    draw_counter = 0
    colour = {1: 'white', 0: 'black'}
    for turn in range(1, num_of_turns):
        # number of disks before the move:
        size_before = current_board.get_number_of_disks(None)

        boards = []
        Moves.get_all_next_boards(current_board, colour[turn % 2], boards)
        # get the next board
        current_board = random.choice(boards)

        # number of disks before the move:
        size_after = current_board.get_number_of_disks(None)

        draw_counter = update_draw_counter(draw_counter, size_before,
                                           size_after)

    return current_board, num_of_turns, draw_counter
Ejemplo n.º 2
0
    def choose_board(self, board: Board, turn: int,
                     draw_counter: int) -> Board:
        """Choose the best board by applying the best move on the given board.

        Parameters
        ----------
        board : Board
            the current board.
        turn : int
            turn's number.
        draw_counter : int
            counter of non-attack moves.

        Returns
        -------
        Board
            the new board, which is the result of applying
            the best valid move on the given board.

        """
        # generate all possible boards
        boards = []
        Moves.get_all_next_boards(board, self._colour, boards)
        # get the fitness of every board
        values = self.get_fitness(boards, turn + 1, draw_counter)
        # get the id of the best board
        # because our evaluation function is predict how good
        # a board is for white, then when we play as 'black'
        # we must takes the minimum to put the 'white' in the worst
        # possible scenario.
        if self._colour == 'black':
            i = np.argmin(values)
        else:
            i = np.argmax(values)
        return boards[i]
Ejemplo n.º 3
0
 def get_board(board, loc1, loc2):
     boards = []
     Moves.get_next_boards(board, loc1, boards)
     for b in boards:
         if b.get_disk_at(loc2) is not None:
             return b
     return None
Ejemplo n.º 4
0
    def _max(self, board: Board, depth: int, turn: int, draw_counter: int,
             alpha: float, beta: float) -> float:
        self._tot_num += 1  # increase the number of explored nodes.

        # apply dynamic path feature if it's set to true.
        # please note that this feature is under tests.
        if self._dynamic_depth is True:
            if self._tot_num > MiniMaxAlphaBetaSystem.maximum_nodes_number:
                return np.sum(
                    self._pred_system.predict([board], turn, draw_counter))

        # get the status if current node.
        status = self._terminal_state(board, turn, draw_counter)
        if status is not None:
            # lose for black is win for white and vice versa.
            sign = 1 if turn % 2 == 1 else -1
            if status == 'win':
                return 100 * sign
            elif status == 'lose':
                return -100 * sign
            else:
                return 0

        # if we reach the maximum depth then stop and return
        # a prediction of the current board state.
        if depth >= self._depth:
            return np.sum(
                self._pred_system.predict([board], turn, draw_counter))

        # generate the next boards by applying the valid moves.
        boards = []
        Moves.get_all_next_boards(board, 'white', boards)

        current_max = -1e7
        size_before = board.get_number_of_disks(None)
        for b in boards:
            next_draw_counter = draw_counter
            size_after = b.get_number_of_disks(None)
            next_draw_counter = update_draw_counter(next_draw_counter,
                                                    size_before, size_after)
            current_max = max(
                current_max,
                self._min(b, depth + 1, turn + 1, next_draw_counter, alpha,
                          beta))
            if current_max >= beta:
                return current_max
            alpha = max(alpha, current_max)
        return current_max
Ejemplo n.º 5
0
def generate_random_training_set() -> tuple:
    colour = {1: 'white', 0: 'black'}
    current_board = initial_board_generator()
    boards = []  # list of white board positions through the game.
    # 2-tuple
    # first element is the status (i.e 'win', 'lose', or draw).
    # second element is the colour of the player with the final status.
    final_status = (None, None)
    start_turn = 1
    draw_counter = 0
    for turn in range(start_turn, 10000):
        # store the boards where its white turn.
        if colour[turn % 2] == 'white':
            boards.append(current_board)
        # get status of the game, 'None' indicates that game is still going
        status = current_board.get_status(colour[turn % 2], draw_counter)

        # if the game ends set the right values to final_status
        # then exit the game
        if status is not None:
            final_status = (status, colour[turn % 2])
            break

        # number of disks before the move:
        size_before = current_board.get_number_of_disks(None)

        # get the next board
        # this done by letting the agent choose the move.
        next_boards = []
        Moves.get_all_next_boards(current_board, colour[turn % 2],
                                  next_boards)
        current_board = random.choice(next_boards)

        # number of disks before the move:
        size_after = current_board.get_number_of_disks(None)

        draw_counter = update_draw_counter(draw_counter, size_before,
                                           size_after)

    # get the final status for white
    if final_status[1] == 'black' and final_status[0] != 'draw':
        final_status = ('win' if final_status[0] == 'lose' else 'lose',
                        final_status[1])
    return boards, final_status[0]
Ejemplo n.º 6
0
 def _min(self, board: Board, depth: int, turn: int, draw_counter: int,
          alpha: float, beta: float) -> float:
     self._tot_num += 1
     if self._dynamic_depth is True:
         if self._tot_num > MiniMaxAlphaBetaSystem.maximum_nodes_number:
             return np.sum(
                 self._pred_system.predict([board], turn, draw_counter))
     status = self._terminal_state(board, turn, draw_counter)
     if status is not None:
         sign = 1 if turn % 2 == 1 else -1
         if status == 'win':
             return 100 * sign
         elif status == 'lose':
             return -100 * sign
         else:
             return 0
     if depth >= self._depth:
         return np.sum(
             self._pred_system.predict([board], turn, draw_counter))
     boards = []
     Moves.get_all_next_boards(board, 'black', boards)
     current_min = 1e7
     size_before = board.get_number_of_disks(None)
     for b in boards:
         next_draw_counter = draw_counter
         size_after = b.get_number_of_disks(None)
         next_draw_counter = update_draw_counter(next_draw_counter,
                                                 size_before, size_after)
         current_min = min(
             current_min,
             self._max(b, depth + 1, turn + 1, next_draw_counter, alpha,
                       beta))
         if current_min <= alpha:
             return current_min
         beta = min(beta, current_min)
     return current_min
Ejemplo n.º 7
0
 def _f6_number_of_pieces_threatened_by_black(self, board: Board) -> float:
     threatened = dict()
     boards = []
     Moves.get_all_next_boards(board, 'black', boards, threatened)
     return len(threatened.keys())
Ejemplo n.º 8
0
def train(agent: Agent,
          num_of_games: int,
          initial_game: tuple = None,
          explore_probability: float = 10,
          output: bool = False,
          results_output: bool = True,
          plots: bool = True) -> tuple:
    """Train the agent by making it plays games against its self.

    Parameters
    ----------
    agent : Agent
        The agent to train.
    num_of_games : int
        number of training games.
    initial_game : tuple
        a starting game
            first element is an initial board.
            second element is turn's number.
            third element is draw counter.
        the default is None.
    explore_probability : int
        the probability that the agent choose to explore a random
        move to explore instead of playing the optimal move he learnt so far.
        if it's None then no exploration will happened.
        the probability will be 1/explore_probability.
        the default is 0.1.
    output: bool, optional
        indicates if you want to print the game or not.
        the default is False.
    results_output: bool, optional
        indicates if you want to print the game's results or not.
        the default is True.
    plots: bool, optional
        indicates if you want to print the plots for errors
        in prediction or not.
        the default is True.

    Returns
    -------
    tuple
        first element, list:
            i-th element is the error after updating parameters using i-th game
        second element, list:
            i-th element is the result for on the i-th game.

    """
    costs = []
    cli = CLI()  # object to use the console interface if output is True.
    # dictionary for convert status into numbers.
    d = {'win': 1, 'lose': -1, 'draw': 0}
    # dictionary for knowing the colour of the current player
    # 1 is the first player
    # 0 is the second player
    colour = {1: 'white', 0: 'black'}
    # used to restore the agent colour after training end
    # thats because we are updating evaluation based on white position.
    previous_colour = agent.get_colour()
    results = []  # store the results of the games.
    # the probability that the agent will choose random move.
    prob = explore_probability
    data_set = {'board': [], 'f_status': []}
    for i in range(num_of_games):
        boards = []  # list of white board positions through the game.
        if initial_game is None:
            # generate the initial board
            current_board = initial_board_generator()
            start_turn = 1
            draw_counter = 0
        else:
            current_board, start_turn, draw_counter = initial_game
        # 2-tuple
        # first element is the status (i.e 'win', 'lose', or draw).
        # second element is the colour of the player with the final status.
        final_status = (None, None)

        for turn in range(start_turn, 10000):
            # alternate the roles of the same agent by setting its colour
            # to the colour of the player who should move in the current turn
            agent.set_colour(colour[turn % 2])
            # store the boards when its white turn.
            if colour[turn % 2] == 'white':
                boards.append(current_board)
            # get status of the game, 'None' indicates that game is still going
            status = current_board.get_status(colour[turn % 2], draw_counter)

            # print the turn number and the board if we need them.
            if output is True:
                print(f'turn: {turn}')
                cli.show(current_board)

            # if the game ends set the right values to final_status
            # then exit the game
            if status is not None:
                final_status = (status, colour[turn % 2])
                break

            # number of disks before the move:
            size_before = current_board.get_number_of_disks(None)

            # get the next board
            # this done by letting the agent choose the move.
            if prob is not None:
                r = random.randint(1, prob)
            if prob is not None and r == 1:
                next_boards = []
                Moves.get_all_next_boards(current_board, colour[turn % 2],
                                          next_boards)
                current_board = random.choice(next_boards)
            else:
                current_board = agent.choose_board(current_board, turn,
                                                   draw_counter)

            # number of disks before the move:
            size_after = current_board.get_number_of_disks(None)

            draw_counter = update_draw_counter(draw_counter, size_before,
                                               size_after)

        # get the final status for white
        if final_status[1] == 'black' and final_status[0] != 'draw':
            final_status = ('win' if final_status[0] == 'lose' else 'lose',
                            final_status[1])
        # add the cost before updating.
        costs.append(agent.get_system().compute_error(boards, final_status[0]))
        # let the agent learn using the boards positions through the game.
        # agent.learn(boards, final_status[0])
        # add the result of the game for white player
        results.append(d[final_status[0]])
        data_set['board'].append(boards)
        data_set['f_status'].append(final_status[0])

        if (i + 1) % 100 == 0:
            print(f'{i+1} games finished unitl now!')
    # reset the colour of the agent
    agent.set_colour(previous_colour)
    # let the agent learn from the previous games
    m = min(results.count(-1), results.count(1))
    lose_m = m
    win_m = m
    for i in range(num_of_games):
        bs = data_set['board'][i]
        fs = data_set['f_status'][i]
        if fs == 'win' and win_m > 0:
            agent.learn(bs, fs)
            win_m -= 1
        elif fs == 'lose' and lose_m > 0:
            agent.learn(bs, fs)
            lose_m -= 1
        elif fs == 'draw':
            agent.learn(bs, fs)
    if results_output is True:
        wins = results.count(1)
        loses = results.count(-1)
        draws = results.count(0)
        print(f'training results of agent {agent.get_name()}')
        print(f'wins: {wins}')
        print(f'loses: {loses}')
        print(f'draws: {draws}')
        print('##################################')
        print(f'cost: {costs[-1]}')
    if plots is True:
        print_iter = num_of_games // num_of_games
        plt.plot(np.arange(0, num_of_games, print_iter + 1),
                 costs[::print_iter + 1], 'k')
        plt.show()
    return costs, results