Ejemplo n.º 1
0
    def get_v(self, board: Board) -> np.ndarray:
        """
        Returns all values when moving from current state of 'board'
        :param board: The current board state
        :return: List of values of all possible next board states
        """
        # We build the value dictionary in a lazy manner, only adding a state when it is actually used for the first time
        #
        board_hash = board.hash_value(
        )  # needed because value dictionary maps *hashed* state to values
        if board_hash in self.v:
            vals = self.v[board_hash]
        else:
            vals = np.full(9, self.v_init)  # default initial value
            # set values for winning states to WIN_VALUE
            # (player cannot end up in a losing state after a move
            # so losing states need not be considered):
            for pos in range(vals.size):  # vals.size = BOARD_SIZE
                if board.is_legal(pos):
                    b = Board(board.state)
                    b.move(pos, self.side)
                    if b.check_win():
                        vals[pos] = self.v_win
                    elif b.num_empty() == 0:
                        # if it is not a win, and there are no other positions
                        # available, then it is a draw
                        vals[pos] = self.v_draw
            # Update dictionary:
            self.v[board_hash] = vals
#            print("v[{}]={}".format(board_hash, self.v[board_hash]))
        return vals
Ejemplo n.º 2
0
 def move(self, board: Board):
     """
     Makes a move and returns the game result after this move and whether the move ended the game
     :param board: The board to make a move on
     :return: The GameResult after this move, Flag to indicate whether the move finished the game
     """
     m = self.get_move(board)
     self.move_history.append((board.hash_value(), m))
     _, res, finished = board.move(m, self.side)
     return res, finished
Ejemplo n.º 3
0
    def _min(self, board: Board) -> (float, int):
        """
        Evaluate the board position `board` from the Minimizing player's point of view.

        :param board: The board position to evaluate
        :return: Tuple of (Best Result, Best Move in this situation). Returns -1 for best move if the game has already
        finished
        """

        #
        # First we check if we have seen this board position before, and if yes just return the cached value
        #
        board_hash = board.hash_value()
        if board_hash in self.cache:
            return self.cache[board_hash]

        #
        # Init the min value as well as action. Min value is set to DRAW as this value will pass through in case
        # of a draw
        #
        min_value = self.DRAW_VALUE
        action = -1

        #
        # If the game has already finished we return. Otherwise we look at possible continuations
        #
        winner = board.who_won()
        if winner == self.side:
            min_value = self.WIN_VALUE
            action = -1
        elif winner == board.other_side(self.side):
            min_value = self.LOSS_VALUE
            action = -1
        else:
            for index in [
                    i for i, e in enumerate(board.state)
                    if board.state[i] == EMPTY
            ]:
                b = Board(board.state)
                b.move(index, board.other_side(self.side))

                res, _ = self._max(b)
                if res < min_value or action == -1:
                    min_value = res
                    action = index

                    # Shortcut: Can't get better than that, so abort here and return this move
                    if min_value == self.LOSS_VALUE:
                        self.cache[board_hash] = (min_value, action)
                        return min_value, action

                self.cache[board_hash] = (min_value, action)
        return min_value, action
Ejemplo n.º 4
0
    def _min(self, board: Board) -> int:
        """
        Evaluate the board position `board` from the Minimizing player's point of view.
        :param board: The board position to evaluate
        :return: returns the best Move in this situation. Returns -1 for best move if the game has already
        finished
        """

        #
        # First we check if we have seen this board position before, and if yes just return a random choice
        # from the cached values
        #
        board_hash = board.hash_value()
        if board_hash in self.cache:
            return random.choice(self.cache[board_hash])

        #
        # If the game has already finished we return. Otherwise we look at possible continuations
        #
        winner = board.who_won()
        if winner == self.side:
            best_moves = {(self.WIN_VALUE, -1)}
        elif winner == board.other_side(self.side):
            best_moves = {(self.LOSS_VALUE, -1)}
        else:
            #
            # Init the min value as well as action. Min value is set to DRAW as this value will pass through in case
            # of a draw
            #
            min_value = self.DRAW_VALUE
            action = -1
            best_moves = {(min_value, action)}
            for index in [
                    i for i, e in enumerate(board.state)
                    if board.state[i] == EMPTY
            ]:
                b = Board(board.state)
                b.move(index, board.other_side(self.side))

                res, _ = self._max(b)
                if res < min_value or action == -1:
                    min_value = res
                    action = index
                    best_moves = {(min_value, action)}
                elif res == min_value:
                    action = index
                    best_moves.add((min_value, action))

        best_moves = tuple(best_moves)
        self.cache[board_hash] = best_moves

        return random.choice(best_moves)
Ejemplo n.º 5
0
 def get_move(self, board: Board) -> int:
     """
     Return the next move given the board `board` based on the current Q values
     :param board: The current board state
     :return: The next move based on the current Q values for the input state
     """
     board_hash = board.hash_value()  # type: int
     qvals = self.get_q(board_hash)  # type: np.ndarray
     while True:
         m = np.argmax(qvals)  # type: int
         if board.is_legal(m):
             return m
         else:
             qvals[m] = -1.0
Ejemplo n.º 6
0
    def move(self, board: Board):
        """
        Makes a move and returns the game result after this move and whether the move ended the game
        :param board: The board to make a move on
        :return: The GameResult after this move, Flag to indicate whether the move finished the game
        """
        # Select strategy to choose next move: exploit known or explore unknown?
        if np.random.uniform(0, 1) <= self.epsilon:
            self.move_strategy = MoveStrategy.EXPLORATION
        else:
            self.move_strategy = MoveStrategy.EXPLOITATION

        m = self.get_move(board)
        self.move_history.append((board.hash_value(), m))
        self.backup_value()
        #        print("v={}".format(self.v))
        _, res, finished = board.move(m, self.side)
        return res, finished