Ejemplo n.º 1
0
def generate_move_mcts(
    board: Board, player: BoardPiece, saved_state: Optional[SavedState]
) -> Tuple[PlayerAction, Optional[SavedState]]:
    """
    Agent selects a move based on a minimax depth first search, with
    alpha-beta pruning.

    :param board: 2d array representing current state of the game
    :param player: the player who made the last move (active player)
    :param saved_state: ???

    :return: the agent's selected move
    """

    # TODO: return chosen action subtree using saved_state, to improve
    #  performance
    # Calculate the board shape
    bd_shp = board.shape
    # If the board is empty, play in the center column
    if np.all(board == NO_PLAYER):
        action = np.floor(np.median(np.arange(bd_shp[1])))
        return PlayerAction(action), saved_state

    # Convert the board to bitmaps and define the max_player board
    max_board, mask_board = board_to_bitmap(board, player)
    # Create a root node
    root_mcts = Connect4Node(max_board, mask_board, bd_shp, -1, True)
    # Call MCTS
    action = mcts(root_mcts)

    return PlayerAction(action), saved_state
Ejemplo n.º 2
0
def test_agents():
    """ Test that the agents minimax and MCTS take immediate wins and block immediate losses"""

    empty_board = initialize_game_state()
    n_rows, n_cols = empty_board.shape
    for player in players:
        opponent = PLAYER1 if player == PLAYER2 else PLAYER2
        # Test for immediate wins (p=player) and immediate losses (p=opponent)
        for p in (player, opponent):
            board_col = empty_board.copy()
            board_row = empty_board.copy()
            # Check for win and loss in a row and column
            for i in range(CONNECT_N - 1):
                board_row = apply_player_action(board_row, PlayerAction(i), p)
                board_col = apply_player_action(board_col,
                                                PlayerAction(CONNECT_N - 1), p)
            # Check that both agents make the right move (always column CONNECT_N -1 = 3)
            for move_agent in move_agents:
                for board in [board_row, board_col]:
                    action = move_agent(board, player, None)[0]
                    assert action == PlayerAction(CONNECT_N - 1)

        # Test that the agent blocks a certain win of the opponent (two free player pieces in the middle of
        # the board generate a certain win if the player does not put his piece to the right of left)
        board = empty_board.copy()
        board[-1, 1:3] = opponent
        for move_agent in move_agents:
            action = move_agent(board, player, None)[0]
            assert action == PlayerAction(0) or action == PlayerAction(3)
Ejemplo n.º 3
0
def generate_move_alpha_beta(
    board: Board, player: BoardPiece, saved_state: Optional[SavedState]
) -> Tuple[PlayerAction, Optional[SavedState]]:
    """
    Agent selects a move based on a minimax depth first search, with
    alpha-beta pruning.

    :param board: 2d array representing current state of the game
    :param player: the player who made the last move (active player)
    :param saved_state: ???

    :return: the agent's selected move
    """

    # If the board is empty, play in the center column
    if np.all(board == NO_PLAYER):
        action = np.floor(np.median(np.arange(board.shape[1])))
        return PlayerAction(action), saved_state

    # Convert the board to bitmaps and define the min_player board
    max_board, mask_board = board_to_bitmap(board, player)

    # Call alpha_beta
    alpha0 = -100000
    beta0 = 100000
    score, action = alpha_beta(max_board, mask_board, True, 0, alpha0, beta0,
                               board.shape)

    return PlayerAction(action), saved_state
Ejemplo n.º 4
0
def user_move(board: np.ndarray, _player: BoardPiece, saved_state: Optional[SavedState]):
    action = PlayerAction(-1)
    while not 0 <= action < board.shape[1]:
        try:
            action = PlayerAction(input("Column? "))
        except ValueError:
            print("Input could not be converted to the dtype PlayerAction, try entering an integer.")
    return action, saved_state
Ejemplo n.º 5
0
def user_move(board: np.ndarray, _player: BoardPiece, saved_state: Optional[SavedState]):
    action = PlayerAction(-1)
    while not 0 <= action < board.shape[1]:
        try:
            action = PlayerAction(input("Column? "))
        except:
            pass
    return action, saved_state
Ejemplo n.º 6
0
def generate_move_random(
    board: np.ndarray, player: BoardPiece, saved_state: Optional[SavedState]
) -> Tuple[PlayerAction, Optional[SavedState]]:
    action = PlayerAction(-1)
    # Choose a valid, non-full column randomly and return it as `action`
    if player == BoardPiece(2):
        valid_columns = []
        for col in range(COLUMNS):
            if board[ROWS - 1][col] == 0:
                valid_columns.append(col)
        action = PlayerAction(random.sample(valid_columns, 1))
    return action, saved_state
Ejemplo n.º 7
0
def ucb1_func(node: Node, c: float) -> Node:
    """
    Returns the most urgent child to visit using the Upper Bound Confidence interval
    :param node:
        Node: current node in which to check for most urgent child
    :param c:
        float: exploration parameter

    :return:
        Node: node of the most urgent child to visit
    """
    # Initialize variables
    child_action_key, urgent_child = None, None
    ucb1_max = -np.inf

    # Use UCB1 to select the next node to visit
    for a, child in zip(node.children.keys(), node.children.values()):
        ucb1 = (child.r / child.n) + c * ((
            (2 * log(node.n)) / child.n)**(1 / 2))

        # Select node with highest UCB1
        if ucb1 >= ucb1_max:
            ucb1_max = ucb1  # update max UCB1
            urgent_child = child
            child_action_key = a

    return urgent_child, PlayerAction(child_action_key)
Ejemplo n.º 8
0
def generate_move_random(board: np.ndarray, player: BoardPiece,
                         saved_state: Optional[SavedState]):
    """Random getting a board and the corresponding player turn and returning
          a non-full column.

       Yields a non-full column action to be performed considering the board state.
       Args:
           board: Current state of the board
           player: Whose turn is it.
           saved_state: Optimal pre-computation work performed in previous steps.

       Returns:
           action: Column to use.
           saved_state: Not-yet implemented, but needed for the main.py algorithm.

       """

    exit_yes = False
    old_board = board.copy()
    action = np.array([0])

    while not exit_yes:
        action = PlayerAction(np.random.randint(7))
        old_board, position = apply_player_action(old_board, action, player,
                                                  True, True)
        if position != 0:
            exit_yes = True

    return action, saved_state
Ejemplo n.º 9
0
def ucb1_func(node: Node, c: float) -> Node:
    """
    Returns the most urgent child to visit using the Upper Bound Confidence interval
    :param node:
        Node: current node in which to check for most urgent child
    :param c:
        float: exploration parameter

    :return:
        Node: node of the most urgent child to visit
    """
    # Use UCB1 to select the next node to visit
    child_action_key = None
    ucb1_max = -np.inf
    for a in node.children.keys():
        child = node.children[a]
        ucb1 = (child.r / child.n) + c * np.sqrt(
            (2 * np.log(node.n)) / child.n)

        # Select node with highest UCB1
        if ucb1 >= ucb1_max:
            ucb1_max = ucb1  # update max UCB1
            child_action_key = a

    return node.children[child_action_key], PlayerAction(child_action_key)
Ejemplo n.º 10
0
def alpha_beta(board: np.ndarray, player: BoardPiece, depth: np.int,
               alpha: np.float, beta: np.float,
               maximizingPlayer: bool) -> Tuple[PlayerAction, np.float]:
    # Choose a valid, non-full column randomly and return it as `action`
    valid_columns = np.where(board[-1, :] == 0)[0]
    opp_player = PLAYER2 if player == PLAYER1 else PLAYER1
    game_state = check_end_state(board,
                                 opp_player if maximizingPlayer else player)
    if depth == 0 or game_state in (GameState.IS_DRAW, GameState.IS_WIN):
        if game_state == GameState.IS_WIN:
            if maximizingPlayer:
                return PlayerAction(-1), -1000000000000
            else:
                return PlayerAction(-1), 1000000000000

        elif game_state == GameState.IS_DRAW:
            return PlayerAction(
                np.random.choice(np.array(valid_columns).flatten(), 1)), 0
        else:  # depth = 0
            return PlayerAction(
                np.random.choice(np.array(valid_columns).flatten(),
                                 1)), score_position(board, player)

    if maximizingPlayer:
        value = -math.inf
        column = np.random.choice(np.array(valid_columns).flatten(), 1)
        for col in valid_columns:
            # board_copy = board.copy()
            new_board = apply_player_action(board, PlayerAction(col), player,
                                            True)
            new_score = alpha_beta(new_board, opp_player, depth - 1, alpha,
                                   beta, False)[1]
            if new_score > value:
                value = new_score
                column = col
            alpha = max(alpha, value)
            if alpha >= beta:
                break
        return PlayerAction(column), value

    else:  # Minimizing player
        value = math.inf
        column = np.random.choice(np.array(valid_columns).flatten(), 1)
        for col in valid_columns:
            # board_copy = board.copy()
            new_board = apply_player_action(board, PlayerAction(col), player,
                                            True)
            new_score = alpha_beta(new_board, opp_player, depth - 1, alpha,
                                   beta, True)[1]
            if new_score < value:
                value = new_score
                column = col
            beta = min(beta, value)
            if alpha >= beta:
                break
        return PlayerAction(column), value
Ejemplo n.º 11
0
    def test_connected_four_horizontal(self):
        c4_yes = common.initialize_game_state()
        common.apply_player_action(c4_yes, PlayerAction(0), common.PLAYER1)
        common.apply_player_action(c4_yes, PlayerAction(1), common.PLAYER1)
        common.apply_player_action(c4_yes, PlayerAction(2), common.PLAYER1)
        common.apply_player_action(c4_yes, PlayerAction(3), common.PLAYER1)

        c4_no = common.initialize_game_state()
        common.apply_player_action(c4_no, PlayerAction(0), common.PLAYER1)
        common.apply_player_action(c4_no, PlayerAction(1), common.PLAYER1)
        common.apply_player_action(c4_no, PlayerAction(2), common.PLAYER2)
        common.apply_player_action(c4_no, PlayerAction(3), common.PLAYER1)

        assert common.connected_four(c4_yes, PLAYER1) == True
        assert common.connected_four(c4_yes, PLAYER1, PlayerAction(3)) == True
        assert common.connected_four(c4_no, PLAYER1) == False
        assert common.connected_four(c4_no, PLAYER1, PlayerAction(3)) == False
Ejemplo n.º 12
0
def test_apply_player_action():
    from agents.common import apply_player_action, PlayerAction

    board = np.zeros((6, 7), dtype=BoardPiece)
    action = PlayerAction(2)
    player = BoardPiece(2)
    copy = True
    ret = apply_player_action(board, action, player, copy)
    assert isinstance(ret, np.ndarray)
Ejemplo n.º 13
0
def test_apply_player_action():
    from agents.common import apply_player_action
    from agents.common import initialize_game_state

    dummy_board = initialize_game_state()
    dummy_board[0, 0] = PLAYER1

    test_board = initialize_game_state()
    #with copying
    copied_test_board = apply_player_action(test_board,
                                            PlayerAction(0),
                                            PLAYER1,
                                            copy=True)
    #without copying
    apply_player_action(test_board, PlayerAction(0), PLAYER1)

    assert (copied_test_board == dummy_board).all()
    assert (test_board == dummy_board).all()
Ejemplo n.º 14
0
def user_move(board: np.ndarray, _player: BoardPiece,
              saved_state: Optional[SavedState], args):
    """
    :param board: State of board, 6 x 7 with either 0 or player ID [1, 2]
    :param _player: Player ID of the user
    :param saved_state: not used this implementation of the user move generation
    :param args: Optional parameter
    :return: Column the user wants to drop his player
    """
    action = PlayerAction(-1)
    move_worked = None
    # Make sure that a column is selected which is in the range of the board and is not already full
    while not 0 <= action < board.shape[1] or move_worked is None:
        try:
            action = PlayerAction(input("Column? "))
            move_worked = apply_player_action(board, action, _player)
        except:
            pass
    return action, SavedState()
Ejemplo n.º 15
0
def test_apply_player_action():
    from agents.common import initialize_game_state, apply_player_action

    action = PlayerAction(3)
    player = PLAYER1
    board = b1
    board_after_action = apply_player_action(board, action, player)
    assert board_after_action.shape == board.shape
    assert board_after_action.any() == PLAYER1
    assert board_after_action[:, action].any() == PLAYER1
Ejemplo n.º 16
0
def generate_move_minimax(
    board: np.ndarray, player: BoardPiece, saved_state: Optional[SavedState]
) -> Tuple[PlayerAction, Optional[SavedState]]:
    alpha = -math.inf
    beta = math.inf
    depth = 4

    # Choose a valid, non-full column that maximizes score and return it as `action`
    action = minimax(board, depth, alpha, beta, player, True)[0]

    return PlayerAction(action), saved_state
Ejemplo n.º 17
0
def generate_move_random(
    board: np.ndarray, player: BoardPiece, saved_state: Optional[SavedState]
) -> Tuple[PlayerAction, Optional[SavedState]]:
    # Choose a valid, non-full column randomly and return it as `action`
    topRow = board[5, :]
    idxList = []
    for idx, col in enumerate(topRow):
        if col == 0:
            idxList.append(idx)
    action = PlayerAction(random.choice(idxList))
    return action, saved_state
Ejemplo n.º 18
0
def test_random():
    from agents.agents_random.random import generate_move_random
    board = np.array([[1, 2, 2, 0, 1, 2, 2],
                      [2, 1, 1, 2, 1, 2, 2],
                      [2, 2, 1, 1, 1, 2, 2],
                      [2, 1, 2, 2, 2, 1, 1],
                      [1, 2, 1, 1, 1, 2, 2],
                      [1, 1, 2, 1, 2, 1, 2]])
    action, saved_state = generate_move_random(board,BoardPiece(1),saved_state=0)

    assert isinstance(action,PlayerAction)
    assert action == PlayerAction(3)  #Taking the empty one
Ejemplo n.º 19
0
def generate_move_random(
    board: np.ndarray,
    _player: BoardPiece,
    saved_state: Optional[SavedState] = None
) -> Tuple[PlayerAction, SavedState]:
    # Choose a valid, non-full column randomly and return it as `action`

    valid_columns = np.where(board[-1, :] == 0)
    action = PlayerAction(
        np.random.choice(np.array(valid_columns).flatten(), 1))

    return action, saved_state
Ejemplo n.º 20
0
def generate_move_random(board: np.ndarray, player: BoardPiece, saved_state: Optional[SavedState], args=None) \
        -> Tuple[PlayerAction, SavedState]:
    """
    :param board: State of board, 6 x 7 with either 0 or player ID [1, 2]
    :param player: Player ID of random agent
    :param saved_state: Not used in this implementation of the random move generation
    :param args: Optional parameter
    :return: Column in which player wants to make his move (chosen randomly)
    """
    # Get column indexes where there is no player and choose one empty column randomly
    action = np.random.choice(np.unique(np.where(board == NO_PLAYER)[1]))
    return PlayerAction(action), SavedState()
Ejemplo n.º 21
0
def test_apply_player_action_fail():
    """Test that an error is raised if an action in a full or not existent column is applied"""

    # Test for the insertion in a already full column
    full_board = initialize_game_state()
    full_board[:] = PLAYER1  # Fill the board completely with one player
    n_cols = full_board.shape[1]
    for i in range(
            n_cols):  # Check that the exception is raised in every column
        with pytest.raises(Exception) as e:
            assert apply_player_action(full_board, PlayerAction(i), PLAYER1)
            assert str(
                e.value
            ) == "Tried to apply an action in a non existent or full column"
    # Test for a non existent column
    with pytest.raises(Exception) as e:
        assert apply_player_action(initialize_game_state(), PlayerAction(100),
                                   PLAYER1)
        assert str(
            e.value
        ) == "Tried to apply an action in a non existent or full column"
Ejemplo n.º 22
0
def generate_move_minimax(board: np.ndarray, player: BoardPiece, saved_state: Optional[SavedState],
                          depth: int = 4) -> Tuple[PlayerAction, SavedState]:
    """
    :param board: State of board, 6 x 7 with either 0 or player ID [1, 2]
    :param player: Player ID
    :param saved_state: Not used in this implementation of the minimax move generation
    :param depth: Depth of the minimax agent / how many steps should be searched ahead
    :return: Column in which player wants to make his move (chosen using the minimax algorithm)
    """
    # If the minimax agent can make the first move, make sure it is always in the middle (position 3)
    if not board.any():
        return PlayerAction(3), SavedState()

    # Create a list that holds the player first, and the opponent second
    players = [PLAYER1, PLAYER2]
    players.remove(player)
    ordered_players = [player] + players

    # Determine the best action using a minimax algorithm with alpha-bet-pruning which looks 4 steps ahead
    # (two for each player)
    _, action = minimax(board, -np.inf, np.inf, ordered_players, depth, True)
    return PlayerAction(action), SavedState()
Ejemplo n.º 23
0
def generate_move_MCTS(board: np.ndarray, player: BoardPiece, saved_state: Optional[SavedState],
                       max_time: float = 5) \
        -> Tuple[PlayerAction, SavedState]:
    """
    :param board: State of board, 6 x 7 with either 0 or player ID [1, 2]
    :param player: Player ID
    :param saved_state: Not used in this implementation of the move generation
    :param max_time: Time ins sec given to the MCTS agent to find teh next action
    :return: Column in which player wants to make his move (chosen using MCTS)
    """
    # Give time sec to the agent to find a good action
    action = MCTS(board, player, max_time)
    return PlayerAction(action), SavedState()
Ejemplo n.º 24
0
def generate_move_minimax(
        board: np.ndarray, _player: BoardPiece,
        saved_state: Optional[SavedState]) -> Tuple[PlayerAction, SavedState]:
    # Choose a valid, non-full column randomly and return it as `action`
    depth = 4
    alpha = -math.inf
    beta = math.inf
    maximizingPlayer = True

    action = alpha_beta(board, _player, depth, alpha, beta,
                        maximizingPlayer)[0]

    return PlayerAction(action), saved_state
Ejemplo n.º 25
0
 def buildGameStateFromID(self):
     """
     plays 4 moves on self.board if they're possible
     """
     # positionID = eg 4256 -> 1. Move: 4th Column, 2. Move: 2nd column
     player = self.player
     for col in self.positionID:
         action = PlayerAction(int(col))
         if move_is_possible(self.board, action):
             apply_player_action(self.board, action, player)
             player = other_player(player)
         else:
             self.status = False
             break
Ejemplo n.º 26
0
def test_apply_player_action():
    from agents.common import apply_player_action, initialize_game_state

    board = initialize_game_state()
    board[5, 0] = PLAYER2
    board[5, 1] = PLAYER1
    board[5, 2] = PLAYER2
    board[5, 3] = PLAYER1
    board[5, 4] = PLAYER1
    board[5, 5] = PLAYER1

    copy_board = board.copy()
    old_board, position = apply_player_action(board, PlayerAction(3), PLAYER1,
                                              True, True)

    assert old_board.all() == copy_board.all()
    assert position == (4, 3)
    assert board[position] == PLAYER1

    board[:, 0] = PLAYER1
    position2 = apply_player_action(board, PlayerAction(0), PLAYER1, False,
                                    True)
    assert position2 == 0  # Return 0 if full column.
Ejemplo n.º 27
0
def generate_move_MCTS(board: np.ndarray, player: BoardPiece,
                       saved_state: Optional[SavedState]) \
        -> object:

    global PLAYER
    global OPPONENT

    PLAYER = player
    if PLAYER == PLAYER1:
        OPPONENT = PLAYER2
    else:
        OPPONENT = PLAYER1

    action = MCTS(board)
    return PlayerAction(action), SavedState()
Ejemplo n.º 28
0
    def traverse(self):
        """ Searches the tree until a node with unexpanded children is found

        This function is called recursively during the selection phase of MCTS.
        Recursion ceases once it reaches a node with unexpanded children. At
        this point, a new child is created from the node's list of actions, and
        the remainder of the game is simulated. The stats are then updated and
        propagated up to the root node, which made the original call.

        Parameters
            node = node selected by root node or previous select_action call
        """

        # Check whether the current node is a terminal state
        if self.state == GameState.IS_WIN:
            if self.max_player:
                return True
            else:
                return False
        elif self.state == GameState.IS_DRAW:
            return -1

        # If any children are unexpanded, expand them and run a simulation
        if len(self.children) < len(self.actions):
            # Select the next randomized action in the list
            action = PlayerAction(self.actions[len(self.children)])
            # Apply the action to the current board
            child_bd, child_msk = apply_action_cp(self.board, self.mask,
                                                  action, self.shape)
            # Add the new child to the node
            new_child = Connect4Node(child_bd, child_msk, self.shape, action,
                                     not self.max_player)
            # If the game does not end, continue building the tree
            self.add_child(new_child)
            # Simulate the game to completion
            max_win = new_child.sim_game()
            # Update the child's stats
            new_child.update_stats(max_win)
        # Else, continue tree traversal
        else:
            next_node_ind = self.ucb1_select()
            next_child = self.children[next_node_ind]
            max_win = next_child.traverse()

        # Update new child's stats based on the result of a simulation
        self.update_stats(max_win)

        return max_win
Ejemplo n.º 29
0
def test_apply_player_action_success():
    """Test for successful application of actions"""

    # Test if application of action (drop of the board piece) is possible for every cell and player
    for player in players:
        board = initialize_game_state()
        n_rows = board.shape[0]
        for action, column in enumerate(board.T):
            for i in range(n_rows):
                board = apply_player_action(board=board,
                                            action=PlayerAction(action),
                                            player=player)
                assert isinstance(board, np.ndarray)
                assert board.dtype == np.int8
                assert board.shape == (6, 7)
                assert not np.all(board == 0)
                assert board[n_rows - 1 - i, action] == player
Ejemplo n.º 30
0
def generate_move_minimax(
    board: np.ndarray, player: BoardPiece, saved_state: Optional[SavedState]
) -> Tuple[PlayerAction, Optional[SavedState]]:
    """

    :param board:   np.ndarray
                    Contains current state of the board an ndarray, shape (ROWS, COLUMNS) and data type (dtype) BoardPiece
    :param player:  BoardPiece
                    Current player playing the game
    :param saved_state: Saved state of the game
    :return: action:    PlayerAction (np.int8)
                        The column to be played
            saved_state: The saved state of the game

    """
    col_, val = minimax(4, board, player, math.inf, -math.inf, True)
    action = PlayerAction(int(col_))
    return action, saved_state