Ejemplo n.º 1
0
def test_check_end_state():

    test_board = cm.initialize_game_state()
    test_board[:3, ::2] = cm.PLAYER1
    test_board[3:, 1::2] = cm.PLAYER1
    test_board[:3, 1::2] = cm.PLAYER2
    test_board[3:, ::2] = cm.PLAYER2
    print('')
    print(cm.pretty_print_board(test_board))
    assert cm.check_end_state(test_board, cm.PLAYER1) == \
        cm.GameState.IS_DRAW

    test_board[0, 1] = cm.PLAYER1
    test_board[1, 2] = cm.PLAYER1
    test_board[2, 3] = cm.PLAYER1
    test_board[3, 4] = cm.PLAYER1
    assert cm.check_end_state(test_board, cm.PLAYER1) == \
        cm.GameState.IS_WIN

    test_board[-1, -1] = cm.NO_PLAYER
    assert cm.check_end_state(test_board, cm.PLAYER1) == \
        cm.GameState.IS_WIN
Ejemplo n.º 2
0
def human_vs_agent(generate_move_1: GenMove,
                   generate_move_2: GenMove,
                   player_1: str = "Player 1",
                   player_2: str = "Player 2",
                   args_1: tuple = (),
                   args_2: tuple = (),
                   init_1: Callable = lambda board, player: None,
                   init_2: Callable = lambda board, player: None):
    import time
    from agents.common_arrays import PLAYER1, PLAYER2, GameState
    from agents.common_arrays import initialize_game_state, pretty_print_board, \
        apply_player_action, check_end_state

    players = (PLAYER1, PLAYER2)

    # Play two games, where each player gets a chance to go first
    p1_wins = 0
    for play_first in (1, -1):
        # Initialize a string to store actions
        game_moves_out = ''
        game_moves = ''
        # This loop initializes the variables to speed up computation when
        # using the numba compiler
        for init, player in zip((init_1, init_2)[::play_first], players):
            init(initialize_game_state(), player)

        saved_state = {PLAYER1: None, PLAYER2: None}
        board = initialize_game_state()
        gen_moves = (generate_move_1, generate_move_2)[::play_first]
        player_names = (player_1, player_2)[::play_first]
        gen_args = (args_1, args_2)[::play_first]

        playing = True
        while playing:
            for player, player_name, gen_move, args in zip(
                    players, player_names, gen_moves, gen_args):

                # Time how long a move takes
                t0 = time.time()

                # Inform the player which letter represents them
                # print(pretty_print_board(board))
                # print(f'{player_name} you are playing with '
                #       f'{"X" if player == PLAYER1 else "O"}')

                # Generate an action, either through user input or by an
                # agent function
                action, saved_state[player] = gen_move(board.copy(), player,
                                                       saved_state[player],
                                                       *args)

                # print(f"Move time: {time.time() - t0:.3f}s")

                # Save the move
                game_moves_out += str(action)
                game_moves += str(action)
                game_moves += ', '
                # Update the board with the action
                apply_player_action(board, action, player)
                end_state = check_end_state(board, player)

                # Check to see whether the game is a win or draw
                if end_state != GameState.STILL_PLAYING:
                    # print(pretty_print_board(board))

                    if end_state == GameState.IS_DRAW:
                        print("Game ended in draw")
                        print(game_moves)
                        p1_wins += 0.5
                    else:
                        print(f'{player_name} won playing '
                              f'{"X" if player == PLAYER1 else "O"}')
                        print(game_moves)
                        if player_name == 'Player 1':
                            p1_wins += 1

                    playing = False
                    break
    return p1_wins
Ejemplo n.º 3
0
def alpha_beta(board: Board, player: BoardPiece, max_player: bool, depth: int,
               alpha: GameScore,
               beta: GameScore) -> Tuple[GameScore, Optional[PlayerAction]]:
    """
    Recursively call alpha_beta to build a game tree to a pre-determined
    max depth. Once at the max depth, or at a terminal node, calculate and
    return the heuristic score. Scores farther down the tree are penalized.

    Shortcuts are built in to:
    1. Automatically take a win
    2. Automatically block a loss
    3. Return a large score for a win at any depth

    :param board: 2d array representing current state of the game
    :param player: the player who made the last move (active player)
    :param max_player: boolean indicating whether the depth at which alpha_beta
                       is called from is a maximizing or minimizing player
    :param depth: the current depth in the game tree
    :param alpha: the currently best score for the maximizing player along the
                  path to root
    :param beta: the currently best score for the minimizing player along the
                  path to root

    :return: the best action and the associated score
    """
    # Make a list of columns that can be played in
    potential_actions = np.argwhere(board[-1, :] == 0)
    potential_actions = potential_actions.reshape(potential_actions.size)

    # If the node is at the max depth or a terminal node calculate the score
    max_depth = 4
    win_score = 150
    state_p = check_end_state(board, player)
    # state_np = check_end_state(board, BoardPiece(player % 2 + 1))
    if state_p == GameState.IS_WIN:
        if max_player:
            return GameScore(win_score), None
        else:
            return GameScore(-win_score), None
    # elif state_np == GameState.IS_WIN:
    #     if max_player:
    #         return GameScore(-win_score), None
    #     else:
    #         return GameScore(win_score), None
    elif state_p == GameState.IS_DRAW:
        return 0, None
    elif depth == max_depth:
        return heuristic_solver(board, player, max_player), None
        # return heuristic_solver_bits(board, player, max_player), None

    # # If this is the root call, check for wins and block/win, prioritize wins
    # win_score = 150
    # if depth == 0:
    #     for col in potential_actions:
    #         if connect_four(apply_player_action(board, col, player, True),
    #                         player, col):
    #             return GameScore(win_score), PlayerAction(col)
    #     for col in potential_actions:
    #         if connect_four(apply_player_action(board, col,
    #                         BoardPiece(player % 2 + 1), True),
    #                         BoardPiece(player % 2 + 1), col):
    #             return GameScore(win_score), PlayerAction(col)

    # For each potential action, call alpha_beta
    if max_player:
        # score = -np.inf
        score = -100000
        for col in potential_actions:
            # Apply the current action and call alpha_beta
            new_board = apply_player_action(board, col, player, True)
            new_score, temp = alpha_beta(new_board, BoardPiece(player % 2 + 1),
                                         False, depth + 1, alpha, beta)
            new_score -= 5 * depth
            # Check whether the score updates
            if new_score > score:
                score = new_score
                action = col
            # Check whether we can prune the rest of the branch
            if score >= beta:
                # print('Pruned a branch')
                break
            # Check whether alpha updates the score
            if score > alpha:
                alpha = score
        return GameScore(score), PlayerAction(action)
    else:
        # score = np.inf
        score = 100000
        for col in potential_actions:
            # Apply the current action and call alpha_beta
            new_board = apply_player_action(board, col, player, True)
            new_score, temp = alpha_beta(new_board, BoardPiece(player % 2 + 1),
                                         True, depth + 1, alpha, beta)
            new_score += 5 * depth
            # Check whether the score updates
            if new_score < score:
                score = new_score
                action = col
            # Check whether we can prune the rest of the branch
            if score <= alpha:
                # print('Pruned a branch')
                break
            # Check whether alpha updates the score
            if score < beta:
                beta = score
        return GameScore(score), PlayerAction(action)
Ejemplo n.º 4
0
def alpha_beta(
    board: Board,
    player: BoardPiece,
    max_player: bool,
    depth: int,
    alpha: GameScore,
    beta: GameScore,
) -> Tuple[GameScore, Optional[PlayerAction]]:
    """

    """
    # Make a list of columns that can be played in
    potential_actions = np.argwhere(board[-1, :] == 0)
    potential_actions = potential_actions.reshape(potential_actions.size)

    # If the node is at the max depth or a terminal node calculate the score
    max_depth = 6
    win_score = 150
    state_p = check_end_state(board, player)
    state_np = check_end_state(board, BoardPiece(player % 2 + 1))
    # if depth == max_depth or np.all(board != 0):
    #     return heuristic_solver(board, player, max_player), None
    #     # return heuristic_solver_bits(board, player, max_player), None
    if state_p == GameState.IS_WIN:
        if max_player:
            return GameScore(win_score), None
        else:
            return GameScore(-win_score), None
    elif state_np == GameState.IS_WIN:
        if max_player:
            return GameScore(-win_score), None
        else:
            return GameScore(win_score), None
    elif depth == max_depth:
        return heuristic_solver(board, player, max_player), None
        # return heuristic_solver_bits(board, player, max_player), None
    elif state_p == GameState.IS_DRAW:
        return 0, None

    # # If this is the root call, check for wins and block/win, prioritize wins
    # win_score = 150
    # if depth == 0:
    #     for col in potential_actions:
    #         if connect_four(apply_player_action(board, col, player, True),
    #                         player, col):
    #             return GameScore(win_score), PlayerAction(col)
    #     for col in potential_actions:
    #         if connect_four(apply_player_action(board, col,
    #                                             BoardPiece(player % 2 + 1), True),
    #                         BoardPiece(player % 2 + 1), col):
    #             return GameScore(win_score), PlayerAction(col)

    # For each potential action, call alpha_beta
    if max_player:
        score = -100000
        for col in potential_actions:
            # Apply the current action and call alpha_beta
            new_board = apply_player_action(board, col, player, True)
            new_score, temp = alpha_beta(new_board, BoardPiece(player % 2 + 1),
                                         False, depth + 1, alpha, beta)
            new_score -= 5 * depth
            # Check whether the score updates
            if new_score > score:
                score = new_score
                action = col
            # Check whether we can prune the rest of the branch
            if score >= beta:
                # print('Pruned a branch')
                break
            # Check whether alpha updates the score
            if score > alpha:
                alpha = score
        return GameScore(score), PlayerAction(action)
    else:
        score = 100000
        for col in potential_actions:
            # Apply the current action and call alpha_beta
            new_board = apply_player_action(board, col, player, True)
            new_score, temp = alpha_beta(new_board, BoardPiece(player % 2 + 1),
                                         True, depth + 1, alpha, beta)
            new_score += 5 * depth
            # Check whether the score updates
            if new_score < score:
                score = new_score
                action = col
            # Check whether we can prune the rest of the branch
            if score <= alpha:
                # print('Pruned a branch')
                break
            # Check whether alpha updates the score
            if score < beta:
                beta = score
        return GameScore(score), PlayerAction(action)