def get_v(self, board: Board) -> np.ndarray: """ Returns all values when moving from current state of 'board' :param board: The current board state :return: List of values of all possible next board states """ # We build the value dictionary in a lazy manner, only adding a state when it is actually used for the first time # board_hash = board.hash_value( ) # needed because value dictionary maps *hashed* state to values if board_hash in self.v: vals = self.v[board_hash] else: vals = np.full(9, self.v_init) # default initial value # set values for winning states to WIN_VALUE # (player cannot end up in a losing state after a move # so losing states need not be considered): for pos in range(vals.size): # vals.size = BOARD_SIZE if board.is_legal(pos): b = Board(board.state) b.move(pos, self.side) if b.check_win(): vals[pos] = self.v_win elif b.num_empty() == 0: # if it is not a win, and there are no other positions # available, then it is a draw vals[pos] = self.v_draw # Update dictionary: self.v[board_hash] = vals # print("v[{}]={}".format(board_hash, self.v[board_hash])) return vals
def move(self, board: Board): """ Makes a move and returns the game result after this move and whether the move ended the game :param board: The board to make a move on :return: The GameResult after this move, Flag to indicate whether the move finished the game """ m = self.get_move(board) self.move_history.append((board.hash_value(), m)) _, res, finished = board.move(m, self.side) return res, finished
def _min(self, board: Board) -> (float, int): """ Evaluate the board position `board` from the Minimizing player's point of view. :param board: The board position to evaluate :return: Tuple of (Best Result, Best Move in this situation). Returns -1 for best move if the game has already finished """ # # First we check if we have seen this board position before, and if yes just return the cached value # board_hash = board.hash_value() if board_hash in self.cache: return self.cache[board_hash] # # Init the min value as well as action. Min value is set to DRAW as this value will pass through in case # of a draw # min_value = self.DRAW_VALUE action = -1 # # If the game has already finished we return. Otherwise we look at possible continuations # winner = board.who_won() if winner == self.side: min_value = self.WIN_VALUE action = -1 elif winner == board.other_side(self.side): min_value = self.LOSS_VALUE action = -1 else: for index in [ i for i, e in enumerate(board.state) if board.state[i] == EMPTY ]: b = Board(board.state) b.move(index, board.other_side(self.side)) res, _ = self._max(b) if res < min_value or action == -1: min_value = res action = index # Shortcut: Can't get better than that, so abort here and return this move if min_value == self.LOSS_VALUE: self.cache[board_hash] = (min_value, action) return min_value, action self.cache[board_hash] = (min_value, action) return min_value, action
def _min(self, board: Board) -> int: """ Evaluate the board position `board` from the Minimizing player's point of view. :param board: The board position to evaluate :return: returns the best Move in this situation. Returns -1 for best move if the game has already finished """ # # First we check if we have seen this board position before, and if yes just return a random choice # from the cached values # board_hash = board.hash_value() if board_hash in self.cache: return random.choice(self.cache[board_hash]) # # If the game has already finished we return. Otherwise we look at possible continuations # winner = board.who_won() if winner == self.side: best_moves = {(self.WIN_VALUE, -1)} elif winner == board.other_side(self.side): best_moves = {(self.LOSS_VALUE, -1)} else: # # Init the min value as well as action. Min value is set to DRAW as this value will pass through in case # of a draw # min_value = self.DRAW_VALUE action = -1 best_moves = {(min_value, action)} for index in [ i for i, e in enumerate(board.state) if board.state[i] == EMPTY ]: b = Board(board.state) b.move(index, board.other_side(self.side)) res, _ = self._max(b) if res < min_value or action == -1: min_value = res action = index best_moves = {(min_value, action)} elif res == min_value: action = index best_moves.add((min_value, action)) best_moves = tuple(best_moves) self.cache[board_hash] = best_moves return random.choice(best_moves)
def get_move(self, board: Board) -> int: """ Return the next move given the board `board` based on the current Q values :param board: The current board state :return: The next move based on the current Q values for the input state """ board_hash = board.hash_value() # type: int qvals = self.get_q(board_hash) # type: np.ndarray while True: m = np.argmax(qvals) # type: int if board.is_legal(m): return m else: qvals[m] = -1.0
def move(self, board: Board): """ Makes a move and returns the game result after this move and whether the move ended the game :param board: The board to make a move on :return: The GameResult after this move, Flag to indicate whether the move finished the game """ # Select strategy to choose next move: exploit known or explore unknown? if np.random.uniform(0, 1) <= self.epsilon: self.move_strategy = MoveStrategy.EXPLORATION else: self.move_strategy = MoveStrategy.EXPLOITATION m = self.get_move(board) self.move_history.append((board.hash_value(), m)) self.backup_value() # print("v={}".format(self.v)) _, res, finished = board.move(m, self.side) return res, finished