Ejemplo n.º 1
0
def generate_move_random(board: np.ndarray, player: BoardPiece, saved_state: Optional[SavedState], args=None) \
        -> Tuple[PlayerAction, SavedState]:
    """
    :param board: State of board, 6 x 7 with either 0 or player ID [1, 2]
    :param player: Player ID of random agent
    :param saved_state: Not used in this implementation of the random move generation
    :param args: Optional parameter
    :return: Column in which player wants to make his move (chosen randomly)
    """
    # Get column indexes where there is no player and choose one empty column randomly
    action = np.random.choice(np.unique(np.where(board == NO_PLAYER)[1]))
    return PlayerAction(action), SavedState()
Ejemplo n.º 2
0
def test_apply_player_action_fail():
    """Test that an error is raised if an action in a full or not existent column is applied"""

    # Test for the insertion in a already full column
    full_board = initialize_game_state()
    full_board[:] = PLAYER1  # Fill the board completely with one player
    n_cols = full_board.shape[1]
    for i in range(
            n_cols):  # Check that the exception is raised in every column
        with pytest.raises(Exception) as e:
            assert apply_player_action(full_board, PlayerAction(i), PLAYER1)
            assert str(
                e.value
            ) == "Tried to apply an action in a non existent or full column"
    # Test for a non existent column
    with pytest.raises(Exception) as e:
        assert apply_player_action(initialize_game_state(), PlayerAction(100),
                                   PLAYER1)
        assert str(
            e.value
        ) == "Tried to apply an action in a non existent or full column"
Ejemplo n.º 3
0
def test_random():
    from agents.agents_random.random import generate_move_random
    board = np.array([[1, 2, 2, 0, 1, 2, 2],
                      [2, 1, 1, 2, 1, 2, 2],
                      [2, 2, 1, 1, 1, 2, 2],
                      [2, 1, 2, 2, 2, 1, 1],
                      [1, 2, 1, 1, 1, 2, 2],
                      [1, 1, 2, 1, 2, 1, 2]])
    action, saved_state = generate_move_random(board,BoardPiece(1),saved_state=0)

    assert isinstance(action,PlayerAction)
    assert action == PlayerAction(3)  #Taking the empty one
Ejemplo n.º 4
0
def generate_move_random(
    board: np.ndarray,
    _player: BoardPiece,
    saved_state: Optional[SavedState] = None
) -> Tuple[PlayerAction, SavedState]:
    # Choose a valid, non-full column randomly and return it as `action`

    valid_columns = np.where(board[-1, :] == 0)
    action = PlayerAction(
        np.random.choice(np.array(valid_columns).flatten(), 1))

    return action, saved_state
Ejemplo n.º 5
0
def generate_move_MCTS(board: np.ndarray, player: BoardPiece, saved_state: Optional[SavedState],
                       max_time: float = 5) \
        -> Tuple[PlayerAction, SavedState]:
    """
    :param board: State of board, 6 x 7 with either 0 or player ID [1, 2]
    :param player: Player ID
    :param saved_state: Not used in this implementation of the move generation
    :param max_time: Time ins sec given to the MCTS agent to find teh next action
    :return: Column in which player wants to make his move (chosen using MCTS)
    """
    # Give time sec to the agent to find a good action
    action = MCTS(board, player, max_time)
    return PlayerAction(action), SavedState()
Ejemplo n.º 6
0
def generate_move_minimax(
        board: np.ndarray, _player: BoardPiece,
        saved_state: Optional[SavedState]) -> Tuple[PlayerAction, SavedState]:
    # Choose a valid, non-full column randomly and return it as `action`
    depth = 4
    alpha = -math.inf
    beta = math.inf
    maximizingPlayer = True

    action = alpha_beta(board, _player, depth, alpha, beta,
                        maximizingPlayer)[0]

    return PlayerAction(action), saved_state
Ejemplo n.º 7
0
def generate_move_minimax(board: np.ndarray, player: BoardPiece, saved_state: Optional[SavedState],
                          depth: int = 4) -> Tuple[PlayerAction, SavedState]:
    """
    :param board: State of board, 6 x 7 with either 0 or player ID [1, 2]
    :param player: Player ID
    :param saved_state: Not used in this implementation of the minimax move generation
    :param depth: Depth of the minimax agent / how many steps should be searched ahead
    :return: Column in which player wants to make his move (chosen using the minimax algorithm)
    """
    # If the minimax agent can make the first move, make sure it is always in the middle (position 3)
    if not board.any():
        return PlayerAction(3), SavedState()

    # Create a list that holds the player first, and the opponent second
    players = [PLAYER1, PLAYER2]
    players.remove(player)
    ordered_players = [player] + players

    # Determine the best action using a minimax algorithm with alpha-bet-pruning which looks 4 steps ahead
    # (two for each player)
    _, action = minimax(board, -np.inf, np.inf, ordered_players, depth, True)
    return PlayerAction(action), SavedState()
Ejemplo n.º 8
0
 def buildGameStateFromID(self):
     """
     plays 4 moves on self.board if they're possible
     """
     # positionID = eg 4256 -> 1. Move: 4th Column, 2. Move: 2nd column
     player = self.player
     for col in self.positionID:
         action = PlayerAction(int(col))
         if move_is_possible(self.board, action):
             apply_player_action(self.board, action, player)
             player = other_player(player)
         else:
             self.status = False
             break
Ejemplo n.º 9
0
def test_apply_player_action():
    from agents.common import apply_player_action, initialize_game_state

    board = initialize_game_state()
    board[5, 0] = PLAYER2
    board[5, 1] = PLAYER1
    board[5, 2] = PLAYER2
    board[5, 3] = PLAYER1
    board[5, 4] = PLAYER1
    board[5, 5] = PLAYER1

    copy_board = board.copy()
    old_board, position = apply_player_action(board, PlayerAction(3), PLAYER1,
                                              True, True)

    assert old_board.all() == copy_board.all()
    assert position == (4, 3)
    assert board[position] == PLAYER1

    board[:, 0] = PLAYER1
    position2 = apply_player_action(board, PlayerAction(0), PLAYER1, False,
                                    True)
    assert position2 == 0  # Return 0 if full column.
Ejemplo n.º 10
0
def generate_move_MCTS(board: np.ndarray, player: BoardPiece,
                       saved_state: Optional[SavedState]) \
        -> object:

    global PLAYER
    global OPPONENT

    PLAYER = player
    if PLAYER == PLAYER1:
        OPPONENT = PLAYER2
    else:
        OPPONENT = PLAYER1

    action = MCTS(board)
    return PlayerAction(action), SavedState()
Ejemplo n.º 11
0
    def traverse(self):
        """ Searches the tree until a node with unexpanded children is found

        This function is called recursively during the selection phase of MCTS.
        Recursion ceases once it reaches a node with unexpanded children. At
        this point, a new child is created from the node's list of actions, and
        the remainder of the game is simulated. The stats are then updated and
        propagated up to the root node, which made the original call.

        Parameters
            node = node selected by root node or previous select_action call
        """

        # Check whether the current node is a terminal state
        if self.state == GameState.IS_WIN:
            if self.max_player:
                return True
            else:
                return False
        elif self.state == GameState.IS_DRAW:
            return -1

        # If any children are unexpanded, expand them and run a simulation
        if len(self.children) < len(self.actions):
            # Select the next randomized action in the list
            action = PlayerAction(self.actions[len(self.children)])
            # Apply the action to the current board
            child_bd, child_msk = apply_action_cp(self.board, self.mask,
                                                  action, self.shape)
            # Add the new child to the node
            new_child = Connect4Node(child_bd, child_msk, self.shape, action,
                                     not self.max_player)
            # If the game does not end, continue building the tree
            self.add_child(new_child)
            # Simulate the game to completion
            max_win = new_child.sim_game()
            # Update the child's stats
            new_child.update_stats(max_win)
        # Else, continue tree traversal
        else:
            next_node_ind = self.ucb1_select()
            next_child = self.children[next_node_ind]
            max_win = next_child.traverse()

        # Update new child's stats based on the result of a simulation
        self.update_stats(max_win)

        return max_win
Ejemplo n.º 12
0
def test_apply_player_action_success():
    """Test for successful application of actions"""

    # Test if application of action (drop of the board piece) is possible for every cell and player
    for player in players:
        board = initialize_game_state()
        n_rows = board.shape[0]
        for action, column in enumerate(board.T):
            for i in range(n_rows):
                board = apply_player_action(board=board,
                                            action=PlayerAction(action),
                                            player=player)
                assert isinstance(board, np.ndarray)
                assert board.dtype == np.int8
                assert board.shape == (6, 7)
                assert not np.all(board == 0)
                assert board[n_rows - 1 - i, action] == player
Ejemplo n.º 13
0
def generate_move_minimax(
    board: np.ndarray, player: BoardPiece, saved_state: Optional[SavedState]
) -> Tuple[PlayerAction, Optional[SavedState]]:
    """

    :param board:   np.ndarray
                    Contains current state of the board an ndarray, shape (ROWS, COLUMNS) and data type (dtype) BoardPiece
    :param player:  BoardPiece
                    Current player playing the game
    :param saved_state: Saved state of the game
    :return: action:    PlayerAction (np.int8)
                        The column to be played
            saved_state: The saved state of the game

    """
    col_, val = minimax(4, board, player, math.inf, -math.inf, True)
    action = PlayerAction(int(col_))
    return action, saved_state
Ejemplo n.º 14
0
def generate_move_random(
    board: np.ndarray, player: BoardPiece, saved_state: Optional[SavedState]
) -> Tuple[PlayerAction, Optional[SavedState]]:
    """
   Choose randomly an action based on available free columns
    :param board:
        np.ndarray: current state of the board, filled with Player pieces
    :param player:
        BoardPiece: standard generate_move input... not used in this case
    :param saved_state:
        SavedState: standard generate_move input... not used in this case
    :return:
        PlayerAction: random column to play
    """
    # Choose a valid, non-full column randomly and return it as `action`
    action = PlayerAction(
        np.random.choice(np.arange(board.shape[1])[board[-1, :] == NO_PLAYER]))

    return action, saved_state
Ejemplo n.º 15
0
def generate_move(board: np.ndarray, player: BoardPiece, saved_state: Optional[SavedState])\
        -> Tuple[PlayerAction, Optional[SavedState]]:
    """
    generates an optimal move/action using the Monte Carlo Tree Search strategy
    :param board: current state of board
    :param player: player whose move is optimized
    :param saved_state: saved state of board
    :return: move, saved_state (optional)
    """

    # turn on RuntimeWarning
    np.seterr(divide='warn')

    global PLAYER
    global OPPONENT

    # set player and opponent
    PLAYER = player
    OPPONENT = PLAYER1 if player == PLAYER2 else PLAYER2

    # if lowest board row is empty make first move in central col = 3
    if not board[0, :].any():
        action = 3

    else:
        # create root Node object
        root = Node(board_copy=deepcopy(board),
                    parent=None,
                    col=-1,
                    player=PLAYER)
        # create MCTS object for player
        mcts = MCTS(PLAYER)  #to start the time
        # call monte carlo tree search starting from root node
        action = mcts.monte_carlo_tree_search(root)

    # return optimal action for player
    return PlayerAction(action), saved_state
Ejemplo n.º 16
0
 def compute_score(self):
     """
     uses heuristic to set score of node
     """
     action = PlayerAction(self.positionID[3])
     self.score = evaluate_heuristic(self.board, action, self.player)
Ejemplo n.º 17
0
def alpha_beta(board: Bitmap, mask: Bitmap, max_player: bool, depth: int,
               alpha: GameScore, beta: GameScore,
               board_shp: Tuple) -> Tuple[GameScore, Optional[PlayerAction]]:
    """
    Recursively call alpha_beta to build a game tree to a pre-determined
    max depth. Once at the max depth, or at a terminal node, calculate and
    return the heuristic score. Scores farther down the tree are penalized.

    :param board: bitmap representing positions of current player
    :param mask: bitmap representing positions of both players
    :param max_player: boolean indicating whether the depth at which alpha_beta
                       is called from is a maximizing or minimizing player
    :param depth: the current depth in the game tree
    :param alpha: the currently best score for the maximizing player along the
                  path to root
    :param beta: the currently best score for the minimizing player along the
                  path to root
    :param board_shp: the shape of the game board

    :return: the best action and the associated score
    """

    # If the node is at the max depth or a terminal node calculate the score
    max_depth = 7
    win_score = 150
    state_p = check_end_state(board ^ mask, mask, board_shp)
    if state_p == GameState.IS_WIN:
        if max_player:
            return GameScore(-win_score), None
        else:
            return GameScore(win_score), None
    elif state_p == GameState.IS_DRAW:
        return 0, None
    elif depth == max_depth:
        return heuristic_solver_bits(board, mask, board_shp[0],
                                     max_player), None

    # For each potential action, call alpha_beta
    pot_actions = valid_actions(mask, board_shp)
    if max_player:
        score = -100000
        action = -1
        for col in pot_actions:
            # Apply the current action
            min_board, new_mask = apply_action_cp(board, mask, col, board_shp)
            # Call alpha-beta
            new_score, temp = alpha_beta(min_board, new_mask, False, depth + 1,
                                         alpha, beta, board_shp)
            new_score -= depth
            # Check whether the score updates
            if new_score > score:
                score = new_score
                action = col
            # Check whether we can prune the rest of the branch
            if score >= beta:
                # print('Pruned a branch')
                break
            # Check whether alpha updates the score
            if score > alpha:
                alpha = score
        return GameScore(score), PlayerAction(action)
    else:
        score = 100000
        action = -1
        for col in pot_actions:
            # Apply the current action
            max_board, new_mask = apply_action_cp(board, mask, col, board_shp)
            # Call alpha-beta
            new_score, temp = alpha_beta(max_board, new_mask, True, depth + 1,
                                         alpha, beta, board_shp)
            new_score += depth
            # Check whether the score updates
            if new_score < score:
                score = new_score
                action = col
            # Check whether we can prune the rest of the branch
            if score <= alpha:
                # print('Pruned a branch')
                break
            # Check whether alpha updates the score
            if score < beta:
                beta = score
        return GameScore(score), PlayerAction(action)
Ejemplo n.º 18
0
def generate_move_mcts(board: np.ndarray, player: BoardPiece, saved_state: Optional[SavedState]
                       ) -> Tuple[PlayerAction, Optional[SavedState]]:
    root = MonteCarloTreeSearchNode(state=board, player=player)
    best_node = root.best_simulated_action()
    action = PlayerAction(int(best_node.parent_action))
    return action, saved_state
Ejemplo n.º 19
0
def minimax(board: np.ndarray, alpha: int, beta: int, players: List[BoardPiece], depth: int, MaxPlayer: bool) \
        -> Tuple[any, Union[PlayerAction, None]]:
    """
    :param board: State of board, 6 x 7 with either 0 or player ID [1, 2]
    :param alpha: the best value that maximizer can guarantee in the current state or before in the maximizer turn
    :param beta: the best value that minimizer can guarantee in the current state or before it in the minimizer turn
    :param players: List of players with maximizer first
    :param depth: Steps that should be evaluated
    :param MaxPlayer: Bool if it is the maximizers turn
    :return: Best value for maximizer or minimizer and the corresponding action
    """
    # Check endstate of the game after last players move
    end_state = check_end_state(board, players[0] if not MaxPlayer else players[1])
    # Return very positive/negative value if the move of the last player won the game
    if end_state == GameState.IS_WIN:
        if MaxPlayer:
            return -10**10, None
        else:
            return 10**10, None
    if end_state == GameState.IS_DRAW:
        return 0, None
    # Only evaluate the board if the game is still going on and the bottom of the tree is reached
    if end_state == GameState.STILL_PLAYING and depth == 0:
        # Evaluate how good the current board is for the maximizing player
        return eval_board(board, players), None

    if MaxPlayer:
        best_value = -np.inf
        player = players[0]
    else:
        best_value = np.inf
        player = players[1]

    # Get all the possible actions (not already full columns)
    free_columns = np.unique(np.where(board == NO_PLAYER)[1])
    # Change the order of the actions such that in case that more than one action has the same value,
    # a random action is selected
    action_values = []
    for action in free_columns:
        # Apply the action and got one steep deep deeper into the tree
        board_new = apply_player_action(board.copy(), PlayerAction(action), player)
        value, _ = minimax(board_new, alpha, beta, players, depth - 1, not MaxPlayer)
        action_values.append((action, value))
        # If the action results in a board that is better than all the previously checked actions
        # for the current player, save it and the corresponding evaluation of the board
        if MaxPlayer and value >= best_value:
            best_value = value
            best_action = action
            #alpha = max(alpha, best_value)
            #if beta <= alpha:
               # break
        if not MaxPlayer and value <= best_value:
            best_value = value
            best_action = action
            #beta = min(beta, best_value)
            #if beta <= alpha:
                #break
    #if depth == 4:
        #print(action_values)

    return best_value, best_action
Ejemplo n.º 20
0
         "|              |\n" + \
         "|    X X       |\n" + \
         "|    O X X     |\n" + \
         "|  O X O O     |\n" + \
         "|  O O X X     |\n" + \
         "|==============|\n" + \
         "|0 1 2 3 4 5 6 |"
boards: List[str] = [board1, board2]

# np.ndarray representations of initial states
game_state1 = initialize_game_state()
game_state2 = string_to_board(board2)
game_states: np.ndarray = [game_state1, game_state2]

# list of actions and players to test on initial states
actions = [PlayerAction(0), PlayerAction(3), PlayerAction(6)]
players = [PLAYER1, PLAYER2]

# games state 1 with all possible player action combo
game_state1_with_action1_player1 = game_state1.copy()
game_state1_with_action1_player1[5][0] = PLAYER1
game_state1_with_action2_player1 = game_state1.copy()
game_state1_with_action2_player1[5][3] = PLAYER1
game_state1_with_action3_player1 = game_state1.copy()
game_state1_with_action3_player1[5][6] = PLAYER1
game_state1_with_action1_player2 = game_state1.copy()
game_state1_with_action1_player2[5][0] = PLAYER2
game_state1_with_action2_player2 = game_state1.copy()
game_state1_with_action2_player2[5][3] = PLAYER2
game_state1_with_action3_player2 = game_state1.copy()
game_state1_with_action3_player2[5][6] = PLAYER2
Ejemplo n.º 21
0
def minimax_action(board: np.ndarray, player: BoardPiece, saved_state: Optional[SavedState]):
    """Minimax agent getting a board and the corresponding player turn and returning
       the best non-full column for the player according to the algorithm.

    Enter the current state of the board, and performs a top-bottom search on different positions
    of the board, so that the most optimal according the heuristics used is found.

    Args:
        board: Current state of the board
        player: Whose turn is it.
        saved_state: Pre-computation work

    Returns:
        action: Best column to use.
        saved_state_out: Tree structure
    """
    global BOARD_VALUES

    tree = minmax_tree()  # Weights tree initialization.
    other_player = None

    if player == BoardPiece(1):  # Finding out which player is who.
        other_player = BoardPiece(2)
    elif player == BoardPiece(2):
        other_player = BoardPiece(1)

    idx1 = []
    start = -1

    for i in range(0, 7):  # Player plays
        cumul1 = 0  # Initialization of the cumulative variable.
        old_board = board.copy()

        # Optimal way to start: central column.
        if sum(sum(old_board[:, :]) == 0) == 7:
            start = 10
            break

        game, board_val = assign_weight(old_board, i, player, BOARD_VALUES)
        break_y, cumul1 = eval_heu(cumul1, board_val, i, idx1, game, node_type=np.array([-1]))

        if break_y and cumul1 > 10000:  # Already a winning position, break the search.
            tree.child[i].value = cumul1
            break
        elif break_y and cumul1 < 10000:  # Full column, do not go down its branches.
            tree.child[i].value = cumul1
            continue

        idx2 = []

        for j in range(0, 7):  # other player plays
            old_board1 = old_board.copy()

            game, board_val = assign_weight(old_board1, j, other_player, BOARD_VALUES)
            break_y, cumul2 = eval_heu(cumul1, board_val, j, idx2, game)

            if break_y:  # Either a full-column (worst value given) or a win (best one given).
                tree.child[i].child[j].value = cumul2
                continue

            idx3 = []
            for k in range(0, 7):  # player plays
                old_board2 = old_board1.copy()

                game, board_val = assign_weight(old_board2, k, player, BOARD_VALUES)
                break_y, cumul3 = eval_heu(cumul2, board_val, k, idx3, game, np.array([-1]))

                if break_y:  # Either a full-column (worst value given) or a win (best one given).
                    tree.child[i].child[j].child[k].value = cumul3
                    continue
                idx4 = []

                for v in range(0, 7):  # other player plays
                    old_board3 = old_board2.copy()

                    game, board_val = assign_weight(old_board3, v, other_player, BOARD_VALUES)
                    break_y, cumul4 = eval_heu(cumul3, board_val, v, idx4, game)

                    # Last layers' nodes assigned the top-down cumulative heuristic value.
                    tree.child[i].child[j].child[k].child[v].value = cumul4

                _, val_4 = min_child(tree.child[i].child[j].child[k], idx4)
                tree.child[i].child[j].child[k].value = val_4  # Assigning the value to father of minimal node.
            _, val_3 = max_child(tree.child[i].child[j], idx3)
            tree.child[i].child[j].value = val_3  # Assigning the value to father of maximal node.
        _, val_2 = min_child(tree.child[i], idx2)
        tree.child[i].value = val_2  # Assigning the value to father of minimal node.
    action, tree.value = max_child(tree, idx1)

    action = PlayerAction(action)  # Action to be taken in the Class PlayerAction

    if start == 10:  # If it is the 1st movement, 1st action performed is the optimal: column 3.
        action = 3

    saved_state_out = tree

    return action, saved_state_out