def move(self, board: Board) -> (GameResult, bool):
        """
        Makes a move on the given input state
        :param board: The current state of the game
        :return: The GameResult after this move, Flag to indicate whether the move finished the game
        """
        self.board_position_log.append(board.state.copy())
        nn_input = self.board_state_to_nn_input(board.state)

        probs = self.get_valid_probs([nn_input], [board])
        probs = probs[0]

        # Most of the time our next move is the one with the highest probability after removing all illegal ones.
        # Occasionally, however we randomly chose a random move to encourage exploration
        if (self.training is True) and \
                (self.game_counter < self.pre_training_games):
            move = board.random_empty_spot()
        else:
            if np.isnan(probs).any():  # Can happen when all probabilities degenerate to 0. Best thing we can do is
                # make a random legal move
                move = board.random_empty_spot()
            else:
                move = np.random.choice(np.arange(len(probs)), p=probs)
            if not board.is_legal(move):  # Debug case only, I hope
                print("Illegal move!")

        # We record the action we selected as well as the Q values of the current state for later use when
        # adjusting NN weights.
        self.action_log.append(move)

        _, res, finished = board.move(move, self.side)

        return res, finished
Exemple #2
0
    def move(self, board: Board) -> (GameResult, bool):
        """
        Implements the Player interface and makes a move on Board `board`
        :param board: The Board to make a move on
        :return: A tuple of the GameResult and a flag indicating if the game is over after this move.
        """

        # We record all game positions to feed them into the NN for training with the corresponding updated Q
        # values.
        self.board_position_log.append(board.state.copy())

        nn_input = self.board_state_to_nn_input(board.state)

        probs, _ = self.get_valid_probs([nn_input], self.q_net, [board])
        probs = probs[0]

        # Most of the time our next move is the one with the highest probability after removing all illegal ones.
        # Occasionally, however we randomly chose a random move to encourage exploration
        if (self.training is True) and \
                ((self.game_counter < self.pre_training_games) or (np.random.rand(1) < self.random_move_prob)):
            move = board.random_empty_spot()
        else:
            move = np.argmax(probs)

        # We record the action we selected as well as the Q values of the current state for later use when
        # adjusting NN weights.
        self.action_log.append(move)

        # We execute the move and return the result
        _, res, finished = board.move(move, self.side)
        return res, finished
Exemple #3
0
 def get_move(self, board: Board) -> int:
     """
     Return the next move given the board `board` based on the current values of next states
     :param board: The current board state
     :return: The next move based on the current values of next states, starting from input state
     """
     if self.move_strategy == MoveStrategy.EXPLORATION:
         # exploratory random move
         m = board.random_empty_spot()
         _ = self.get_v(
             board)  # just to ensure we have values for our board state
         return m
     else:
         # greedy move: exploiting current knowledge
         vals = self.get_v(board)  # type: np.ndarray
         while True:
             maxv_idxs = np.argwhere(
                 vals == np.amax(vals))  # positions of max values in array
             m = np.random.choice(maxv_idxs.flatten().tolist())  # type: int
             #m = np.argmax(vals)  # type: int # this instead would return 1st occurance
             if board.is_legal(m):
                 #                    print("vals=", end='')
                 #                    print(vals)
                 #                    print("m={}".format(m))
                 return m
             else:
                 vals[m] = -1.0
Exemple #4
0
 def move(self, board: Board) -> (GameResult, bool):
     """
     Making a random move
     :param board: The board to make a move on
     :return: The result of the move
     """
     _, res, finished = board.move(board.random_empty_spot(), self.side)
     return res, finished
Exemple #5
0
def play_random_game():
    board = Board()
    finished = False
    last_play = NAUGHT
    next_play = CROSS
    while not finished:
        _, result, finished = board.move(board.random_empty_spot(), next_play)
        print_board(board)
        last_play, next_play = next_play, last_play
    if result == GameResult.DRAW:
        print("Game is a draw")
    elif last_play == CROSS:
        print("Cross won!")
    else:
        print("Naught won!")
    def move(self, board: Board) -> (GameResult, bool):
        """
        Implements the Player interface and makes a move on Board `board`
        :param board: The Board to make a move on
        :return: A tuple of the GameResult and a flag indicating if the game is over after this move.
        """

        # We record all game positions to feed them into the NN for training with the corresponding updated Q
        # values.
        self.board_position_log.append(board.state.copy())

        nn_input = self.board_state_to_nn_input(board.state)
        probs, qvalues = self.get_probs(nn_input)
        qvalues = np.copy(qvalues)

        # We filter out all illegal moves by setting the probability to 0. We don't change the q values
        # as we don't want the NN to waste any effort of learning different Q values for moves that are illegal
        # anyway.
        for index, p in enumerate(qvalues):
            if not board.is_legal(index):
                probs[index] = -1
            elif probs[index] < 0:
                probs[index] = 0.0

        # Most of the time our next move is the one with the highest probability after removing all illegal ones.
        # Occasionally, however we randomly chose a random move to encourage exploration
        if (self.training is True) and (np.random.rand(1) <
                                        self.random_move_prob):
            move = board.random_empty_spot()
        else:
            move = np.argmax(probs)

        # Unless this is the very first move, the max Q value of this state is also the max Q value of
        # the move that got the game from the previous state to this one.
        if len(self.action_log) > 0:
            self.next_max_log.append(qvalues[np.argmax(probs)])

        # We record the action we selected as well as the Q values of the current state for later use when
        # adjusting NN weights.
        self.action_log.append(move)
        self.values_log.append(qvalues)

        # We execute the move and return the result
        _, res, finished = board.move(move, self.side)
        return res, finished