Beispiel #1
0
    def get_v(self, board: Board) -> np.ndarray:
        """
        Returns all values when moving from current state of 'board'
        :param board: The current board state
        :return: List of values of all possible next board states
        """
        # We build the value dictionary in a lazy manner, only adding a state when it is actually used for the first time
        #
        board_hash = board.hash_value(
        )  # needed because value dictionary maps *hashed* state to values
        if board_hash in self.v:
            vals = self.v[board_hash]
        else:
            vals = np.full(9, self.v_init)  # default initial value
            # set values for winning states to WIN_VALUE
            # (player cannot end up in a losing state after a move
            # so losing states need not be considered):
            for pos in range(vals.size):  # vals.size = BOARD_SIZE
                if board.is_legal(pos):
                    b = Board(board.state)
                    b.move(pos, self.side)
                    if b.check_win():
                        vals[pos] = self.v_win
                    elif b.num_empty() == 0:
                        # if it is not a win, and there are no other positions
                        # available, then it is a draw
                        vals[pos] = self.v_draw
            # Update dictionary:
            self.v[board_hash] = vals
#            print("v[{}]={}".format(board_hash, self.v[board_hash]))
        return vals
Beispiel #2
0
    def _min(self, board: Board) -> (float, int):
        """
        Evaluate the board position `board` from the Minimizing player's point of view.

        :param board: The board position to evaluate
        :return: Tuple of (Best Result, Best Move in this situation). Returns -1 for best move if the game has already
        finished
        """

        #
        # First we check if we have seen this board position before, and if yes just return the cached value
        #
        board_hash = board.hash_value()
        if board_hash in self.cache:
            return self.cache[board_hash]

        #
        # Init the min value as well as action. Min value is set to DRAW as this value will pass through in case
        # of a draw
        #
        min_value = self.DRAW_VALUE
        action = -1

        #
        # If the game has already finished we return. Otherwise we look at possible continuations
        #
        winner = board.who_won()
        if winner == self.side:
            min_value = self.WIN_VALUE
            action = -1
        elif winner == board.other_side(self.side):
            min_value = self.LOSS_VALUE
            action = -1
        else:
            for index in [
                    i for i, e in enumerate(board.state)
                    if board.state[i] == EMPTY
            ]:
                b = Board(board.state)
                b.move(index, board.other_side(self.side))

                res, _ = self._max(b)
                if res < min_value or action == -1:
                    min_value = res
                    action = index

                    # Shortcut: Can't get better than that, so abort here and return this move
                    if min_value == self.LOSS_VALUE:
                        self.cache[board_hash] = (min_value, action)
                        return min_value, action

                self.cache[board_hash] = (min_value, action)
        return min_value, action
Beispiel #3
0
    def _min(self, board: Board) -> int:
        """
        Evaluate the board position `board` from the Minimizing player's point of view.
        :param board: The board position to evaluate
        :return: returns the best Move in this situation. Returns -1 for best move if the game has already
        finished
        """

        #
        # First we check if we have seen this board position before, and if yes just return a random choice
        # from the cached values
        #
        board_hash = board.hash_value()
        if board_hash in self.cache:
            return random.choice(self.cache[board_hash])

        #
        # If the game has already finished we return. Otherwise we look at possible continuations
        #
        winner = board.who_won()
        if winner == self.side:
            best_moves = {(self.WIN_VALUE, -1)}
        elif winner == board.other_side(self.side):
            best_moves = {(self.LOSS_VALUE, -1)}
        else:
            #
            # Init the min value as well as action. Min value is set to DRAW as this value will pass through in case
            # of a draw
            #
            min_value = self.DRAW_VALUE
            action = -1
            best_moves = {(min_value, action)}
            for index in [
                    i for i, e in enumerate(board.state)
                    if board.state[i] == EMPTY
            ]:
                b = Board(board.state)
                b.move(index, board.other_side(self.side))

                res, _ = self._max(b)
                if res < min_value or action == -1:
                    min_value = res
                    action = index
                    best_moves = {(min_value, action)}
                elif res == min_value:
                    action = index
                    best_moves.add((min_value, action))

        best_moves = tuple(best_moves)
        self.cache[board_hash] = best_moves

        return random.choice(best_moves)
Beispiel #4
0
    def move(self, board: Board) -> (GameResult, bool):
        """
        Implements the Player interface and makes a move on Board `board`
        :param board: The Board to make a move on
        :return: A tuple of the GameResult and a flag indicating if the game is over after this move.
        """

        # We record all game positions to feed them into the NN for training with the corresponding updated Q
        # values.
        self.board_position_log.append(board.state.copy())

        nn_input = self.board_state_to_nn_input(board.state)

        probs, _ = self.get_valid_probs([nn_input], self.q_net, [board])
        probs = probs[0]

        # Most of the time our next move is the one with the highest probability after removing all illegal ones.
        # Occasionally, however we randomly chose a random move to encourage exploration
        if (self.training is True) and \
                ((self.game_counter < self.pre_training_games) or (np.random.rand(1) < self.random_move_prob)):
            move = board.random_empty_spot()
        else:
            move = np.argmax(probs)

        # We record the action we selected as well as the Q values of the current state for later use when
        # adjusting NN weights.
        self.action_log.append(move)

        # We execute the move and return the result
        _, res, finished = board.move(move, self.side)
        return res, finished
    def move(self, board: Board) -> (GameResult, bool):
        """
        Makes a move on the given input state
        :param board: The current state of the game
        :return: The GameResult after this move, Flag to indicate whether the move finished the game
        """
        self.board_position_log.append(board.state.copy())
        nn_input = self.board_state_to_nn_input(board.state)

        probs = self.get_valid_probs([nn_input], [board])
        probs = probs[0]

        # Most of the time our next move is the one with the highest probability after removing all illegal ones.
        # Occasionally, however we randomly chose a random move to encourage exploration
        if (self.training is True) and \
                (self.game_counter < self.pre_training_games):
            move = board.random_empty_spot()
        else:
            if np.isnan(probs).any():  # Can happen when all probabilities degenerate to 0. Best thing we can do is
                # make a random legal move
                move = board.random_empty_spot()
            else:
                move = np.random.choice(np.arange(len(probs)), p=probs)
            if not board.is_legal(move):  # Debug case only, I hope
                print("Illegal move!")

        # We record the action we selected as well as the Q values of the current state for later use when
        # adjusting NN weights.
        self.action_log.append(move)

        _, res, finished = board.move(move, self.side)

        return res, finished
Beispiel #6
0
 def move(self, board: Board) -> (GameResult, bool):
     """
     Making a random move
     :param board: The board to make a move on
     :return: The result of the move
     """
     _, res, finished = board.move(board.random_empty_spot(), self.side)
     return res, finished
Beispiel #7
0
 def move(self, board: Board) -> (GameResult, bool):
     """
     Making a move according to the MinMax algorithm
     :param board: The board to make a move on
     :return: The result of the move
     """
     score, action = self._max(board)
     _, res, finished = board.move(action, self.side)
     return res, finished
Beispiel #8
0
 def move(self, board: Board):
     """
     Makes a move and returns the game result after this move and whether the move ended the game
     :param board: The board to make a move on
     :return: The GameResult after this move, Flag to indicate whether the move finished the game
     """
     m = self.get_move(board)
     self.move_history.append((board.hash_value(), m))
     _, res, finished = board.move(m, self.side)
     return res, finished
Beispiel #9
0
 def move(self, board: Board) -> (GameResult, bool):
     """
     Making a move according to the MinMax algorithm. If more than one best move exist, chooses amongst them
     randomly.
     :param board: The board to make a move on
     :return: The result of the move
     """
     score, action = self._max(board)
     _, res, finished = board.move(action, self.side)
     return res, finished
Beispiel #10
0
def play_random_game():
    board = Board()
    finished = False
    last_play = NAUGHT
    next_play = CROSS
    while not finished:
        _, result, finished = board.move(board.random_empty_spot(), next_play)
        print_board(board)
        last_play, next_play = next_play, last_play
    if result == GameResult.DRAW:
        print("Game is a draw")
    elif last_play == CROSS:
        print("Cross won!")
    else:
        print("Naught won!")
Beispiel #11
0
    def move(self, board: Board) -> (GameResult, bool):
        """
        Make move corresponding to key pressed by user
        :param board: The board to make a move on
        :return: The result of the move
        """
        print()
        while True:
            key = input("Your move? ")
            if key in self.keys:
                break
        position = self.keys.index(key)

        _, res, finished = board.move(position, self.side)
        return res, finished
Beispiel #12
0
    def move(self, board: Board) -> (GameResult, bool):
        """
        Implements the Player interface and makes a move on Board `board`
        :param board: The Board to make a move on
        :return: A tuple of the GameResult and a flag indicating if the game is over after this move.
        """

        # We record all game positions to feed them into the NN for training with the corresponding updated Q
        # values.
        self.board_position_log.append(board.state.copy())

        nn_input = self.board_state_to_nn_input(board.state)
        probs, qvalues = self.get_probs(nn_input)
        qvalues = np.copy(qvalues)

        # We filter out all illegal moves by setting the probability to 0. We don't change the q values
        # as we don't want the NN to waste any effort of learning different Q values for moves that are illegal
        # anyway.
        for index, p in enumerate(qvalues):
            if not board.is_legal(index):
                probs[index] = -1
            elif probs[index] < 0:
                probs[index] = 0.0

        # Most of the time our next move is the one with the highest probability after removing all illegal ones.
        # Occasionally, however we randomly chose a random move to encourage exploration
        if (self.training is True) and (np.random.rand(1) <
                                        self.random_move_prob):
            move = board.random_empty_spot()
        else:
            move = np.argmax(probs)

        # Unless this is the very first move, the max Q value of this state is also the max Q value of
        # the move that got the game from the previous state to this one.
        if len(self.action_log) > 0:
            self.next_max_log.append(qvalues[np.argmax(probs)])

        # We record the action we selected as well as the Q values of the current state for later use when
        # adjusting NN weights.
        self.action_log.append(move)
        self.values_log.append(qvalues)

        # We execute the move and return the result
        _, res, finished = board.move(move, self.side)
        return res, finished
Beispiel #13
0
    def move(self, board: Board):
        """
        Makes a move and returns the game result after this move and whether the move ended the game
        :param board: The board to make a move on
        :return: The GameResult after this move, Flag to indicate whether the move finished the game
        """
        # Select strategy to choose next move: exploit known or explore unknown?
        if np.random.uniform(0, 1) <= self.epsilon:
            self.move_strategy = MoveStrategy.EXPLORATION
        else:
            self.move_strategy = MoveStrategy.EXPLOITATION

        m = self.get_move(board)
        self.move_history.append((board.hash_value(), m))
        self.backup_value()
        #        print("v={}".format(self.v))
        _, res, finished = board.move(m, self.side)
        return res, finished