Beispiel #1
0
    def move(self, board):
        empty_cells = rules.empty_cells(board)

        # Check if any of the empty cells represents a winning move
        for cell in empty_cells:
            cell = tuple(cell)
            new_board = board.copy()
            new_board[cell] = self.side
            if rules.winning_move(new_board):
                return cell

        # Check if any of the empty cells represents a winning move for the
        # other player, if so block it
        for cell in empty_cells:
            cell = tuple(cell)
            new_board = board.copy()
            new_board[cell] = -self.side
            if rules.winning_move(new_board):
                if self.logger:
                    self.logger.debug("Blocked {0}".format(cell))
                return cell

        else:
            # Otherwise pick a random cell
            return tuple(empty_cells[random.randint(0, len(empty_cells) - 1)])
Beispiel #2
0
    def test_empty_cells(self):
        board = np.asarray([[1, -1, -1], [-1, 1, 1], [1, -1, 1]])
        expected = []
        empty_cells = rules.empty_cells(board)
        self.assertEqual(list(empty_cells), expected)

        board = np.asarray([[1, 1, 1], [1, 1, 1], [0, 1, 1]])
        expected = [[2, 0]]
        empty_cells = rules.empty_cells(board)
        np.testing.assert_array_equal(empty_cells, expected)

        board = np.asarray([[1, 0, 1], [1, 1, 1], [0, 1, 1]])
        expected = [[0, 1], [2, 0]]
        empty_cells = rules.empty_cells(board)
        np.testing.assert_array_equal(empty_cells, expected)

        board = np.asarray([[1, 1, 1], [0, 0, 0], [-1, 1, 1]])
        expected = [[1, 0], [1, 1], [1, 2]]
        empty_cells = rules.empty_cells(board)
        np.testing.assert_array_equal(empty_cells, expected)
Beispiel #3
0
    def mcts(self, board):
        max_time = time.time() + self.time_budget
        root_node = TreeNode(board)
        playout_count = 0

        while time.time() < max_time and playout_count < self.max_playouts:
            # Start at tree root (current actual state)
            current_node = root_node
            current_player = self.side

            while True:
                # Check for terminal state
                winner = rules.winner(current_node.state)
                if winner or rules.board_full(current_node.state):
                    break

                # Pick a random move
                empty_cells = rules.empty_cells(current_node.state)
                move = tuple(random.choice(empty_cells))

                # Add to tree if not present
                if move not in current_node.child_nodes.keys():
                    # If not, create a TreeNode for it
                    new_board = current_node.state.copy()
                    new_board[move] = current_player
                    current_node.child_nodes[move] = TreeNode(
                        new_board, current_node)
                current_node = current_node.child_nodes[move]

                # Swap players
                current_player = -current_player

            # Terminal state reached so backpropagate result
            if winner == self.side:
                result = 1.0
            elif winner == -self.side:
                result = 0.0
            else:
                result = 0.5
            while current_node is not root_node:
                current_node.visits += 1
                current_node.wins += result
                current_node = current_node.parent

            playout_count += 1

        print "Number of MCTS playouts:", playout_count

        self.root_node = current_node

        # Return move with highest score
        best_move = root_node.best_move()
        return best_move
Beispiel #4
0
def valid_move(board, move):
    """
    Returns whether the move is valid for the given board, i.e. whether it is
    one of the empty cells.

    Args:
        board (numpy.ndarray): two dimensional array representing the game board
        move ((int, int)): tuple with the coordinates of the new move (x, y)

    Returns:
        bool: True if the move is valid, False otherwise
    """
    return list(move) in rules.empty_cells(board).tolist()
Beispiel #5
0
    def move(self, board):
        empty_cells = rules.empty_cells(board)

        # Check if any of the empty cells represents a winning move
        for cell in empty_cells:
            cell = tuple(cell)
            new_board = board.copy()
            new_board[cell] = self.side
            if rules.winning_move(new_board):
                return cell
        else:
            # Otherwise pick a random cell
            return tuple(empty_cells[random.randint(0, len(empty_cells) - 1)])
Beispiel #6
0
    def move(self, board):
        empty_cells = rules.empty_cells(board)

        # Check if any of the empty cells represents a winning move
        for cell in empty_cells:
            cell = tuple(cell)
            new_board = board.copy()
            new_board[cell] = self.side
            if rules.winning_move(new_board, cell):
                return cell
        else:
            # Otherwise pick a random cell
            return tuple(empty_cells[random.randint(0, len(empty_cells) - 1)])
Beispiel #7
0
def valid_move(board, move):
    """
    Returns whether the move is valid for the given board, i.e. whether it is
    one of the empty cells.

    Args:
        board (numpy.ndarray): two dimensional array representing the game board
        move ((int, int)): tuple with the coordinates of the new move (x, y)

    Returns:
        bool: True if the move is valid, False otherwise
    """
    return list(move) in rules.empty_cells(board).tolist()
Beispiel #8
0
    def move_values(self, board):
        """
        Returns a list of the possible moves for the given board with the value
        of each move.

        Args:
            board (numpy.ndarray): two dimensional array representing the board

        Returns:
            [[cell, value]]: a list of cell-value pairs
        """
        values = []
        for cell in rules.empty_cells(board):
            values.append([cell, self.move_value(cell, board)])
        return np.asarray(values)
Beispiel #9
0
    def move_values(self, board):
        """
        Returns a list of the possible moves for the given board with the value
        of each move.

        Args:
            board (numpy.ndarray): two dimensional array representing the board

        Returns:
            [[cell, value]]: a list of cell-value pairs
        """
        values = []
        for cell in rules.empty_cells(board):
            values.append([cell, self.move_value(cell, board)])
        return np.asarray(values)
Beispiel #10
0
    def __init__(self, board, side, parent=None):
        """
        Constructor.

        Args:
            board (numpy.ndarray): two dimensional array representing the game 
                board
            side (int): the player side, defined in the game rules
            parent (int): id of the parent of this node or None
        """
        self.id = UCTTreeNode.new_id(
        )  # get a unique number to identify the node
        self.state = board
        self.side = side
        self.parent = parent
        self.visits = 0
        self.wins = 0
        self.ucb1_score = None
        self.untried_moves = rules.empty_cells(self.state).tolist()
        self.child_nodes = {}
Beispiel #11
0
    def move(self, board):
        # Look up the possible moves in the state values list
        empty_cells = rules.empty_cells(board)
        possible_moves = []  # [[cell, value]]
        for cell in empty_cells:
            possible_moves.append([cell, self.move_value(cell, board)])

        # Sort moves by value (last element has highest value)
        possible_moves = np.asarray(possible_moves)
        possible_moves = possible_moves[possible_moves[:, 1].argsort()]

        # Choose move behaviour based on the bias probability
        # TODO: adjust bias down over time
        if random.random() < self.BIAS:
            self.state = self.EXPLORING
        else:
            self.state = self.EXPLOITING

        # Choose either highest value (exploit) or a random other cell (explore)
        if self.state == self.EXPLOITING or len(possible_moves) == 1:
            # Find the highest value and get all free cells with this value,
            # then choose one at random
            best_value = possible_moves[-1][1]
            best_cells = [x[0] for x in possible_moves if x[1] == best_value]
            i = random.randint(0, len(best_cells) - 1)
            cell = tuple(best_cells[i])
        elif self.state == self.EXPLORING:
            # Choose a random cell that does not have the highest value
            # TODO: weight other moves according to value?
            i = random.randint(0, len(possible_moves) - 2)
            cell = tuple(possible_moves[i][0])
        else:
            raise ValueError("State is unexpected value: {0}".format(
                self.state))

        # Record move state for later
        move_state = board.copy()
        move_state[cell] = self.side
        self.move_states.append(move_state)

        return cell
Beispiel #12
0
    def move(self, board):
        # Look up the possible moves in the state values list
        empty_cells = rules.empty_cells(board)
        possible_moves = []  # [[cell, value]]
        for cell in empty_cells:
            possible_moves.append([cell, self.move_value(cell, board)])

        # Sort moves by value (last element has highest value)
        possible_moves = np.asarray(possible_moves)
        possible_moves = possible_moves[possible_moves[:, 1].argsort()]

        # Choose move behaviour based on the bias probability
        # TODO: adjust bias down over time
        if random.random() < self.BIAS:
            self.state = self.EXPLORING
        else:
            self.state = self.EXPLOITING

        # Choose either highest value (exploit) or a random other cell (explore)
        if self.state == self.EXPLOITING or len(possible_moves) == 1:
            # Find the highest value and get all free cells with this value,
            # then choose one at random
            best_value = possible_moves[-1][1]
            best_cells = [x[0] for x in possible_moves if x[1] == best_value]
            i = random.randint(0, len(best_cells) - 1)
            cell = tuple(best_cells[i])
        elif self.state == self.EXPLORING:
            # Choose a random cell that does not have the highest value
            # TODO: weight other moves according to value?
            i = random.randint(0, len(possible_moves) - 2)
            cell = tuple(possible_moves[i][0])
        else:
            raise ValueError("State is unexpected value: {0}".format(self.state))

        # Record move state for later
        move_state = board.copy()
        move_state[cell] = self.side
        self.move_states.append(move_state)

        return cell
Beispiel #13
0
    def move(self, board):
        empty_cells = rules.empty_cells(board)

        # Check if any of the empty cells represents a winning move
        for cell in empty_cells:
            cell = tuple(cell)
            new_board = board.copy()
            new_board[cell] = self.side
            if rules.winning_move(new_board, cell):
                return cell
        # Check if any of the empty cells represents a winning move for the
        # other player, if so block it
        for cell in empty_cells:
            cell = tuple(cell)
            new_board = board.copy()
            new_board[cell] = -self.side
            if rules.winning_move(new_board, cell):
                if self.logger:
                    self.logger.debug("Blocked {0}".format(cell))
                return cell
        else:
            # Otherwise pick a random cell
            return tuple(empty_cells[random.randint(0, len(empty_cells) - 1)])
Beispiel #14
0
    def minimax(self, board, player):
        """
        Recursive method that returns the optimal next moves and their value.

        The depth of the current move in the tree is recorded so that the agent 
        can favour moves that win quicker (or lose slower) when there are 
        multiple moves with the same expected game result.

        Args:
            state (numpy.ndarray): two dimensional array representing the
                board state
            player (int): the side of the current player
            depth (int): the depth of the move

        Returns:
            result (int): the return value of the moves (100 - depth for a win, 
                0 for a draw or depth - 100 for a loss)
            optimal_moves ([(int, int)]): a list of the optimal next moves
        """
        empty_cells = rules.empty_cells(board)

        # Choose default cell if board is empty to reduce processing time
        # if len(empty_cells) == board.size:
        #     import numpy as np
        #     return None, np.asarray([(0, 0)])

        # Check if this move resulted in a win or draw (base case)
        winner = rules.winner(board)
        if winner is not None:
            if winner == self.side:
                # Player won so return score for a win
                return 1, None
            else:
                # Opponent won so return score for a loss
                return -1, None
        elif rules.board_full(board):
            # Board is full so return score for a draw
            return 0, None

        # Test each child move recursively and add results to the list
        results_list = []
        for cell in empty_cells:
            # Make the move
            cell = tuple(cell)
            board[cell] = player

            # Get the value of this child move and add it to the results
            result, _ = self.minimax(board, -player)
            results_list.append(result)

            # Reverse the move
            board[cell] = rules.EMPTY

        if player is self.side:
            # Return best move for player from list of child moves
            max_score = max(results_list)
            max_inds = [
                i for i, x in enumerate(results_list) if x == max_score
            ]
            optimal_moves = empty_cells[max_inds]
            return max_score, optimal_moves
        else:
            # Return worst move for opponent from list of child moves
            min_element = min(results_list)
            # move = tuple(empty_cells[results_list.index(min_element)])
            # return min_element, move
            return min_element, None  # don't need the actual move
Beispiel #15
0
 def move(self, board):
     # Select an empty cell at random
     empty_cells = rules.empty_cells(board)
     return tuple(empty_cells[random.randint(0, len(empty_cells) - 1)])
Beispiel #16
0
 def move(self, board):
     # Select the first empty cell
     empty_cells = rules.empty_cells(board)
     return tuple(empty_cells[0])
Beispiel #17
0
 def move(self, board):
     # Select the first empty cell
     empty_cells = rules.empty_cells(board)
     return tuple(empty_cells[0])
Beispiel #18
0
 def move(self, board):
     # Select an empty cell at random
     empty_cells = rules.empty_cells(board)
     return tuple(empty_cells[random.randint(0, len(empty_cells) - 1)])